LibreChat/api/models/tx.spec.js

1996 lines
81 KiB
JavaScript
Raw Normal View History

🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
const { maxTokensMap } = require('@librechat/api');
🪨 feat: AWS Bedrock support (#3935) * feat: Add BedrockIcon component to SVG library * feat: EModelEndpoint.bedrock * feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint * fix: declare endpoint in initialization step * chore: Update @librechat/agents dependency to version 1.4.5 * feat: backend content aggregation for agents/bedrock * feat: abort agent requests * feat: AWS Bedrock icons * WIP: agent provider schema parsing * chore: Update EditIcon props type * refactor(useGenerationsByLatest): make agents and bedrock editable * refactor: non-assistant message content, parts * fix: Bedrock response `sender` * fix: use endpointOption.model_parameters not endpointOption.modelOptions * fix: types for step handler * refactor: Update Agents.ToolCallDelta type * refactor: Remove unnecessary assignment of parentMessageId in AskController * refactor: remove unnecessary assignment of parentMessageId (agent request handler) * fix(bedrock/agents): message regeneration * refactor: dynamic form elements using react-hook-form Controllers * fix: agent icons/labels for messages * fix: agent actions * fix: use of new dynamic tags causing application crash * refactor: dynamic settings touch-ups * refactor: update Slider component to allow custom track class name * refactor: update DynamicSlider component styles * refactor: use Constants value for GLOBAL_PROJECT_NAME (enum) * feat: agent share global methods/controllers * fix: agents query * fix: `getResponseModel` * fix: share prompt a11y issue * refactor: update SharePrompt dialog theme styles * refactor: explicit typing for SharePrompt * feat: add agent roles/permissions * chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case * fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary * fix: handle text parts with tool_call_ids and empty text * fix: role initialization * refactor: don't make instructions required * refactor: improve typing of Text part * fix: setShowStopButton for agents route * chore: remove params for now * fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API * refactor: remove console.log statement in ContentRender component * chore: typing, rename Context to Delete Button * chore(DeleteButton): logging * refactor(Action): make accessible * style(Action): improve a11y again * refactor: remove use/mention of mongoose sessions * feat: first pass, sharing agents * feat: visual indicator for global agent, remove author when serving to non-author * wip: params * chore: fix typing issues * fix(schemas): typing * refactor: improve accessibility of ListCard component and fix console React warning * wip: reset templates for non-legacy new convos * Revert "wip: params" This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80. * Revert "refactor: dynamic form elements using react-hook-form Controllers" This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7. * fix(Parameters): types and parameter effect update to only update local state to parameters * refactor: optimize useDebouncedInput hook for better performance * feat: first pass, anthropic bedrock params * chore: paramEndpoints check for endpointType too * fix: maxTokens to use coerceNumber.optional(), * feat: extra chat model params * chore: reduce code repetition * refactor: improve preset title handling in SaveAsPresetDialog component * refactor: improve preset handling in HeaderOptions component * chore: improve typing, replace legacy dialog for SaveAsPresetDialog * feat: save as preset from parameters panel * fix: multi-search in select dropdown when using Option type * refactor: update default showDefault value to false in Dynamic components * feat: Bedrock presets settings * chore: config, fix agents schema, update config version * refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION * refactor: update baseEndpointSchema in config.ts to include baseURL property * refactor: update createRun function to include req parameter and set streamRate based on provider * feat: availableRegions via config * refactor: remove unused demo agent controller file * WIP: title * Update @librechat/agents to version 1.5.0 * chore: addTitle.js to handle empty responseText * feat: support images and titles * feat: context token updates * Refactor BaseClient test to use expect.objectContaining * refactor: add model select, remove header options params, move side panel params below prompts * chore: update models list, catch title error * feat: model service for bedrock models (env) * chore: Remove verbose debug log in AgentClient class following stream * feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models * refactor: handle streamRate in `handleLLMNewToken` callback * chore: AWS Bedrock example config in `.env.example` * refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers * refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes * feat: titleModel support for bedrock * refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
const { EModelEndpoint } = require('librechat-data-provider');
const {
defaultRate,
tokenValues,
getValueKey,
getMultiplier,
cacheTokenValues,
getCacheMultiplier,
} = require('./tx');
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
describe('getValueKey', () => {
it('should return "16k" for model name containing "gpt-3.5-turbo-16k"', () => {
expect(getValueKey('gpt-3.5-turbo-16k-some-other-info')).toBe('16k');
});
it('should return "4k" for model name containing "gpt-3.5"', () => {
expect(getValueKey('gpt-3.5-some-other-info')).toBe('4k');
});
it('should return "32k" for model name containing "gpt-4-32k"', () => {
expect(getValueKey('gpt-4-32k-some-other-info')).toBe('32k');
});
it('should return "8k" for model name containing "gpt-4"', () => {
expect(getValueKey('gpt-4-some-other-info')).toBe('8k');
});
it('should return "gpt-5" for model name containing "gpt-5"', () => {
expect(getValueKey('gpt-5-some-other-info')).toBe('gpt-5');
expect(getValueKey('gpt-5-2025-01-30')).toBe('gpt-5');
expect(getValueKey('gpt-5-2025-01-30-0130')).toBe('gpt-5');
expect(getValueKey('openai/gpt-5')).toBe('gpt-5');
expect(getValueKey('openai/gpt-5-2025-01-30')).toBe('gpt-5');
expect(getValueKey('gpt-5-turbo')).toBe('gpt-5');
expect(getValueKey('gpt-5-0130')).toBe('gpt-5');
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
it('should return "gpt-5.1" for model name containing "gpt-5.1"', () => {
expect(getValueKey('gpt-5.1')).toBe('gpt-5.1');
expect(getValueKey('gpt-5.1-chat')).toBe('gpt-5.1');
expect(getValueKey('gpt-5.1-codex')).toBe('gpt-5.1');
expect(getValueKey('openai/gpt-5.1')).toBe('gpt-5.1');
});
it('should return "gpt-5.2" for model name containing "gpt-5.2"', () => {
expect(getValueKey('gpt-5.2')).toBe('gpt-5.2');
expect(getValueKey('gpt-5.2-chat')).toBe('gpt-5.2');
expect(getValueKey('openai/gpt-5.2')).toBe('gpt-5.2');
});
it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => {
expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106');
expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
expect(getValueKey('gpt-3.5-turbo-1106/openai')).toBe('gpt-3.5-turbo-1106');
});
it('should return "gpt-4-1106" for model name containing "gpt-4-1106"', () => {
expect(getValueKey('gpt-4-1106-some-other-info')).toBe('gpt-4-1106');
expect(getValueKey('gpt-4-1106-vision-preview')).toBe('gpt-4-1106');
expect(getValueKey('gpt-4-1106-preview')).toBe('gpt-4-1106');
expect(getValueKey('openai/gpt-4-1106')).toBe('gpt-4-1106');
expect(getValueKey('gpt-4-1106/openai/')).toBe('gpt-4-1106');
});
it('should return "gpt-4-1106" for model type of "gpt-4-1106"', () => {
expect(getValueKey('gpt-4-vision-preview')).toBe('gpt-4-1106');
expect(getValueKey('openai/gpt-4-1106')).toBe('gpt-4-1106');
expect(getValueKey('gpt-4-turbo')).toBe('gpt-4-1106');
expect(getValueKey('gpt-4-0125')).toBe('gpt-4-1106');
});
it('should return "gpt-4.5" for model type of "gpt-4.5"', () => {
expect(getValueKey('gpt-4.5-preview')).toBe('gpt-4.5');
expect(getValueKey('gpt-4.5-2024-08-06')).toBe('gpt-4.5');
expect(getValueKey('gpt-4.5-2024-08-06-0718')).toBe('gpt-4.5');
expect(getValueKey('openai/gpt-4.5')).toBe('gpt-4.5');
expect(getValueKey('openai/gpt-4.5-2024-08-06')).toBe('gpt-4.5');
expect(getValueKey('gpt-4.5-turbo')).toBe('gpt-4.5');
expect(getValueKey('gpt-4.5-0125')).toBe('gpt-4.5');
});
it('should return "gpt-4.1" for model type of "gpt-4.1"', () => {
expect(getValueKey('gpt-4.1-preview')).toBe('gpt-4.1');
expect(getValueKey('gpt-4.1-2024-08-06')).toBe('gpt-4.1');
expect(getValueKey('gpt-4.1-2024-08-06-0718')).toBe('gpt-4.1');
expect(getValueKey('openai/gpt-4.1')).toBe('gpt-4.1');
expect(getValueKey('openai/gpt-4.1-2024-08-06')).toBe('gpt-4.1');
expect(getValueKey('gpt-4.1-turbo')).toBe('gpt-4.1');
expect(getValueKey('gpt-4.1-0125')).toBe('gpt-4.1');
});
it('should return "gpt-4.1-mini" for model type of "gpt-4.1-mini"', () => {
expect(getValueKey('gpt-4.1-mini-preview')).toBe('gpt-4.1-mini');
expect(getValueKey('gpt-4.1-mini-2024-08-06')).toBe('gpt-4.1-mini');
expect(getValueKey('openai/gpt-4.1-mini')).toBe('gpt-4.1-mini');
expect(getValueKey('gpt-4.1-mini-0125')).toBe('gpt-4.1-mini');
});
it('should return "gpt-4.1-nano" for model type of "gpt-4.1-nano"', () => {
expect(getValueKey('gpt-4.1-nano-preview')).toBe('gpt-4.1-nano');
expect(getValueKey('gpt-4.1-nano-2024-08-06')).toBe('gpt-4.1-nano');
expect(getValueKey('openai/gpt-4.1-nano')).toBe('gpt-4.1-nano');
expect(getValueKey('gpt-4.1-nano-0125')).toBe('gpt-4.1-nano');
});
it('should return "gpt-5" for model type of "gpt-5"', () => {
expect(getValueKey('gpt-5-2025-01-30')).toBe('gpt-5');
expect(getValueKey('gpt-5-2025-01-30-0130')).toBe('gpt-5');
expect(getValueKey('openai/gpt-5')).toBe('gpt-5');
expect(getValueKey('openai/gpt-5-2025-01-30')).toBe('gpt-5');
expect(getValueKey('gpt-5-turbo')).toBe('gpt-5');
expect(getValueKey('gpt-5-0130')).toBe('gpt-5');
});
it('should return "gpt-5-mini" for model type of "gpt-5-mini"', () => {
expect(getValueKey('gpt-5-mini-2025-01-30')).toBe('gpt-5-mini');
expect(getValueKey('openai/gpt-5-mini')).toBe('gpt-5-mini');
expect(getValueKey('gpt-5-mini-0130')).toBe('gpt-5-mini');
expect(getValueKey('gpt-5-mini-2025-01-30-0130')).toBe('gpt-5-mini');
});
it('should return "gpt-5-nano" for model type of "gpt-5-nano"', () => {
expect(getValueKey('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
expect(getValueKey('openai/gpt-5-nano')).toBe('gpt-5-nano');
expect(getValueKey('gpt-5-nano-0130')).toBe('gpt-5-nano');
expect(getValueKey('gpt-5-nano-2025-01-30-0130')).toBe('gpt-5-nano');
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should return "gpt-5-pro" for model type of "gpt-5-pro"', () => {
expect(getValueKey('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
expect(getValueKey('openai/gpt-5-pro')).toBe('gpt-5-pro');
expect(getValueKey('gpt-5-pro-0130')).toBe('gpt-5-pro');
expect(getValueKey('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
expect(getValueKey('gpt-5-pro-preview')).toBe('gpt-5-pro');
});
it('should return "gpt-4o" for model type of "gpt-4o"', () => {
expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o');
expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o');
expect(getValueKey('openai/gpt-4o')).toBe('gpt-4o');
expect(getValueKey('openai/gpt-4o-2024-08-06')).toBe('gpt-4o');
expect(getValueKey('gpt-4o-turbo')).toBe('gpt-4o');
expect(getValueKey('gpt-4o-0125')).toBe('gpt-4o');
});
it('should return "gpt-4o-mini" for model type of "gpt-4o-mini"', () => {
expect(getValueKey('gpt-4o-mini-2024-07-18')).toBe('gpt-4o-mini');
expect(getValueKey('openai/gpt-4o-mini')).toBe('gpt-4o-mini');
expect(getValueKey('gpt-4o-mini-0718')).toBe('gpt-4o-mini');
expect(getValueKey('gpt-4o-2024-08-06-0718')).not.toBe('gpt-4o-mini');
});
it('should return "gpt-4o-2024-05-13" for model type of "gpt-4o-2024-05-13"', () => {
expect(getValueKey('gpt-4o-2024-05-13')).toBe('gpt-4o-2024-05-13');
expect(getValueKey('openai/gpt-4o-2024-05-13')).toBe('gpt-4o-2024-05-13');
expect(getValueKey('gpt-4o-2024-05-13-0718')).toBe('gpt-4o-2024-05-13');
expect(getValueKey('gpt-4o-2024-05-13-0718')).not.toBe('gpt-4o');
});
it('should return "gpt-4o" for model type of "chatgpt-4o"', () => {
expect(getValueKey('chatgpt-4o-latest')).toBe('gpt-4o');
expect(getValueKey('openai/chatgpt-4o-latest')).toBe('gpt-4o');
expect(getValueKey('chatgpt-4o-latest-0916')).toBe('gpt-4o');
expect(getValueKey('chatgpt-4o-latest-0718')).toBe('gpt-4o');
});
it('should return "claude-3-7-sonnet" for model type of "claude-3-7-sonnet-"', () => {
expect(getValueKey('claude-3-7-sonnet-20240620')).toBe('claude-3-7-sonnet');
expect(getValueKey('anthropic/claude-3-7-sonnet')).toBe('claude-3-7-sonnet');
expect(getValueKey('claude-3-7-sonnet-turbo')).toBe('claude-3-7-sonnet');
expect(getValueKey('claude-3-7-sonnet-0125')).toBe('claude-3-7-sonnet');
});
it('should return "claude-3.7-sonnet" for model type of "claude-3.7-sonnet-"', () => {
expect(getValueKey('claude-3.7-sonnet-20240620')).toBe('claude-3.7-sonnet');
expect(getValueKey('anthropic/claude-3.7-sonnet')).toBe('claude-3.7-sonnet');
expect(getValueKey('claude-3.7-sonnet-turbo')).toBe('claude-3.7-sonnet');
expect(getValueKey('claude-3.7-sonnet-0125')).toBe('claude-3.7-sonnet');
});
it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => {
expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet');
expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet');
expect(getValueKey('claude-3-5-sonnet-turbo')).toBe('claude-3-5-sonnet');
expect(getValueKey('claude-3-5-sonnet-0125')).toBe('claude-3-5-sonnet');
});
it('should return "claude-3.5-sonnet" for model type of "claude-3.5-sonnet-"', () => {
expect(getValueKey('claude-3.5-sonnet-20240620')).toBe('claude-3.5-sonnet');
expect(getValueKey('anthropic/claude-3.5-sonnet')).toBe('claude-3.5-sonnet');
expect(getValueKey('claude-3.5-sonnet-turbo')).toBe('claude-3.5-sonnet');
expect(getValueKey('claude-3.5-sonnet-0125')).toBe('claude-3.5-sonnet');
});
2024-11-04 15:10:24 -05:00
it('should return "claude-3-5-haiku" for model type of "claude-3-5-haiku-"', () => {
expect(getValueKey('claude-3-5-haiku-20240620')).toBe('claude-3-5-haiku');
expect(getValueKey('anthropic/claude-3-5-haiku')).toBe('claude-3-5-haiku');
expect(getValueKey('claude-3-5-haiku-turbo')).toBe('claude-3-5-haiku');
expect(getValueKey('claude-3-5-haiku-0125')).toBe('claude-3-5-haiku');
});
it('should return "claude-3.5-haiku" for model type of "claude-3.5-haiku-"', () => {
expect(getValueKey('claude-3.5-haiku-20240620')).toBe('claude-3.5-haiku');
expect(getValueKey('anthropic/claude-3.5-haiku')).toBe('claude-3.5-haiku');
expect(getValueKey('claude-3.5-haiku-turbo')).toBe('claude-3.5-haiku');
expect(getValueKey('claude-3.5-haiku-0125')).toBe('claude-3.5-haiku');
});
it('should return expected value keys for "gpt-oss" models', () => {
expect(getValueKey('openai/gpt-oss-120b')).toBe('gpt-oss-120b');
expect(getValueKey('openai/gpt-oss:120b')).toBe('gpt-oss:120b');
expect(getValueKey('openai/gpt-oss-570b')).toBe('gpt-oss');
expect(getValueKey('gpt-oss-570b')).toBe('gpt-oss');
expect(getValueKey('groq/gpt-oss-1080b')).toBe('gpt-oss');
expect(getValueKey('gpt-oss-20b')).toBe('gpt-oss-20b');
expect(getValueKey('oai/gpt-oss:20b')).toBe('gpt-oss:20b');
});
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
describe('getMultiplier', () => {
it('should return the correct multiplier for a given valueKey and tokenType', () => {
expect(getMultiplier({ valueKey: '8k', tokenType: 'prompt' })).toBe(tokenValues['8k'].prompt);
expect(getMultiplier({ valueKey: '8k', tokenType: 'completion' })).toBe(
tokenValues['8k'].completion,
);
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
it('should return correct multipliers for o4-mini and o3', () => {
['o4-mini', 'o3'].forEach((model) => {
const prompt = getMultiplier({ model, tokenType: 'prompt' });
const completion = getMultiplier({ model, tokenType: 'completion' });
expect(prompt).toBe(tokenValues[model].prompt);
expect(completion).toBe(tokenValues[model].completion);
});
});
it('should return defaultRate if tokenType is provided but not found in tokenValues', () => {
expect(getMultiplier({ valueKey: '8k', tokenType: 'unknownType' })).toBe(defaultRate);
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
it('should derive the valueKey from the model if not provided', () => {
expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-4-some-other-info' })).toBe(
tokenValues['8k'].prompt,
);
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
it('should return 1 if only model or tokenType is missing', () => {
expect(getMultiplier({ tokenType: 'prompt' })).toBe(1);
expect(getMultiplier({ model: 'gpt-4-some-other-info' })).toBe(1);
});
it('should return the correct multiplier for gpt-3.5-turbo-1106', () => {
expect(getMultiplier({ valueKey: 'gpt-3.5-turbo-1106', tokenType: 'prompt' })).toBe(
tokenValues['gpt-3.5-turbo-1106'].prompt,
);
expect(getMultiplier({ valueKey: 'gpt-3.5-turbo-1106', tokenType: 'completion' })).toBe(
tokenValues['gpt-3.5-turbo-1106'].completion,
);
});
it('should return the correct multiplier for gpt-4-1106', () => {
expect(getMultiplier({ valueKey: 'gpt-4-1106', tokenType: 'prompt' })).toBe(
tokenValues['gpt-4-1106'].prompt,
);
expect(getMultiplier({ valueKey: 'gpt-4-1106', tokenType: 'completion' })).toBe(
tokenValues['gpt-4-1106'].completion,
);
});
it('should return the correct multiplier for gpt-5', () => {
const valueKey = getValueKey('gpt-5-2025-01-30');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-5'].completion,
);
expect(getMultiplier({ model: 'gpt-5-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-5', tokenType: 'completion' })).toBe(
tokenValues['gpt-5'].completion,
);
});
it('should return the correct multiplier for gpt-5-mini', () => {
const valueKey = getValueKey('gpt-5-mini-2025-01-30');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-mini'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-5-mini'].completion,
);
expect(getMultiplier({ model: 'gpt-5-mini-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5-mini'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-5-mini', tokenType: 'completion' })).toBe(
tokenValues['gpt-5-mini'].completion,
);
});
it('should return the correct multiplier for gpt-5-nano', () => {
const valueKey = getValueKey('gpt-5-nano-2025-01-30');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-nano'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-5-nano'].completion,
);
expect(getMultiplier({ model: 'gpt-5-nano-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5-nano'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-5-nano', tokenType: 'completion' })).toBe(
tokenValues['gpt-5-nano'].completion,
);
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should return the correct multiplier for gpt-5-pro', () => {
const valueKey = getValueKey('gpt-5-pro-2025-01-30');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-pro'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-5-pro'].completion,
);
expect(getMultiplier({ model: 'gpt-5-pro-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5-pro'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-5-pro', tokenType: 'completion' })).toBe(
tokenValues['gpt-5-pro'].completion,
);
});
it('should return the correct multiplier for gpt-5.1', () => {
expect(getMultiplier({ model: 'gpt-5.1', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5.1'].prompt,
);
expect(getMultiplier({ model: 'gpt-5.1', tokenType: 'completion' })).toBe(
tokenValues['gpt-5.1'].completion,
);
expect(getMultiplier({ model: 'openai/gpt-5.1', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5.1'].prompt,
);
expect(tokenValues['gpt-5.1'].prompt).toBe(1.25);
expect(tokenValues['gpt-5.1'].completion).toBe(10);
});
it('should return the correct multiplier for gpt-5.2', () => {
expect(getMultiplier({ model: 'gpt-5.2', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5.2'].prompt,
);
expect(getMultiplier({ model: 'gpt-5.2', tokenType: 'completion' })).toBe(
tokenValues['gpt-5.2'].completion,
);
expect(getMultiplier({ model: 'openai/gpt-5.2', tokenType: 'prompt' })).toBe(
tokenValues['gpt-5.2'].prompt,
);
expect(tokenValues['gpt-5.2'].prompt).toBe(1.75);
expect(tokenValues['gpt-5.2'].completion).toBe(14);
});
it('should return the correct multiplier for gpt-4o', () => {
const valueKey = getValueKey('gpt-4o-2024-08-06');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-4o'].completion,
);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
tokenValues['gpt-4-1106'].completion,
);
});
it('should return the correct multiplier for gpt-4.1', () => {
const valueKey = getValueKey('gpt-4.1-2024-08-06');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4.1'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-4.1'].completion,
);
expect(getMultiplier({ model: 'gpt-4.1-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-4.1'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-4.1', tokenType: 'completion' })).toBe(
tokenValues['gpt-4.1'].completion,
);
});
it('should return the correct multiplier for gpt-4.1-mini', () => {
const valueKey = getValueKey('gpt-4.1-mini-2024-08-06');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
tokenValues['gpt-4.1-mini'].prompt,
);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-4.1-mini'].completion,
);
expect(getMultiplier({ model: 'gpt-4.1-mini-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-4.1-mini'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-4.1-mini', tokenType: 'completion' })).toBe(
tokenValues['gpt-4.1-mini'].completion,
);
});
it('should return the correct multiplier for gpt-4.1-nano', () => {
const valueKey = getValueKey('gpt-4.1-nano-2024-08-06');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
tokenValues['gpt-4.1-nano'].prompt,
);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-4.1-nano'].completion,
);
expect(getMultiplier({ model: 'gpt-4.1-nano-preview', tokenType: 'prompt' })).toBe(
tokenValues['gpt-4.1-nano'].prompt,
);
expect(getMultiplier({ model: 'openai/gpt-4.1-nano', tokenType: 'completion' })).toBe(
tokenValues['gpt-4.1-nano'].completion,
);
});
it('should return the correct multiplier for gpt-4o-mini', () => {
const valueKey = getValueKey('gpt-4o-mini-2024-07-18');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
tokenValues['gpt-4o-mini'].prompt,
);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-4o-mini'].completion,
);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
tokenValues['gpt-4-1106'].completion,
);
});
it('should return the correct multiplier for chatgpt-4o-latest', () => {
const valueKey = getValueKey('chatgpt-4o-latest');
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
tokenValues['gpt-4o'].completion,
);
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
tokenValues['gpt-4o-mini'].completion,
);
});
it('should derive the valueKey from the model if not provided for new models', () => {
expect(
getMultiplier({ tokenType: 'prompt', model: 'gpt-3.5-turbo-1106-some-other-info' }),
).toBe(tokenValues['gpt-3.5-turbo-1106'].prompt);
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-1106-vision-preview' })).toBe(
tokenValues['gpt-4-1106'].completion,
);
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-0125-preview' })).toBe(
tokenValues['gpt-4-1106'].completion,
);
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-turbo-vision-preview' })).toBe(
tokenValues['gpt-4-1106'].completion,
);
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-3.5-turbo-0125' })).toBe(
tokenValues['gpt-3.5-turbo-0125'].completion,
);
});
it('should return defaultRate if derived valueKey does not match any known patterns', () => {
expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-10-some-other-info' })).toBe(
defaultRate,
);
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
it('should return correct multipliers for GPT-OSS models', () => {
const models = ['gpt-oss-20b', 'gpt-oss-120b'];
models.forEach((key) => {
const expectedPrompt = tokenValues[key].prompt;
const expectedCompletion = tokenValues[key].completion;
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
});
});
it('should return correct multipliers for GLM models', () => {
const models = ['glm-4.6', 'glm-4.5v', 'glm-4.5-air', 'glm-4.5', 'glm-4-32b', 'glm-4', 'glm4'];
models.forEach((key) => {
const expectedPrompt = tokenValues[key].prompt;
const expectedCompletion = tokenValues[key].completion;
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
});
});
feat: Accurate Token Usage Tracking & Optional Balance (#1018) * refactor(Chains/llms): allow passing callbacks * refactor(BaseClient): accurately count completion tokens as generation only * refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM * wip: summary prompt tokens * refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end wip: draft out relevant providers and variables for token tracing * refactor(createLLM): make streaming prop false by default * chore: remove use of getTokenCountForResponse * refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace * chore: remove passing of streaming prop, also console log useful vars for tracing * feat: formatFromLangChain helper function to count tokens for ChatModelStart * refactor(initializeLLM): add role for LLM tracing * chore(formatFromLangChain): update JSDoc * feat(formatMessages): formats langChain messages into OpenAI payload format * chore: install openai-chat-tokens * refactor(formatMessage): optimize conditional langChain logic fix(formatFromLangChain): fix destructuring * feat: accurate prompt tokens for ChatModelStart before generation * refactor(handleChatModelStart): move to callbacks dir, use factory function * refactor(initializeLLM): rename 'role' to 'context' * feat(Balance/Transaction): new schema/models for tracking token spend refactor(Key): factor out model export to separate file * refactor(initializeClient): add req,res objects to client options * feat: add-balance script to add to an existing users' token balance refactor(Transaction): use multiplier map/function, return balance update * refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match * refactor(Tx): add fair fallback value multiplier incase the config result is undefined * refactor(Balance): rename 'tokens' to 'tokenCredits' * feat: balance check, add tx.js for new tx-related methods and tests * chore(summaryPrompts): update prompt token count * refactor(callbacks): pass req, res wip: check balance * refactor(Tx): make convoId a String type, fix(calculateTokenValue) * refactor(BaseClient): add conversationId as client prop when assigned * feat(RunManager): track LLM runs with manager, track token spend from LLM, refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls * feat(spendTokens): helper to spend prompt/completion tokens * feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds refactor(Balance): static check method to return object instead of boolean now wip(OpenAIClient): implement use of checkBalance * refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large refactor(OpenAIClient): add checkBalance refactor(createStartHandler): add checkBalance * chore: remove prompt and completion token logging from route handler * chore(spendTokens): add JSDoc * feat(logTokenCost): record transactions for basic api calls * chore(ask/edit): invoke getResponseSender only once per API call * refactor(ask/edit): pass promptTokens to getIds and include in abort data * refactor(getIds -> getReqData): rename function * refactor(Tx): increase value if incomplete message * feat: record tokenUsage when message is aborted * refactor: subtract tokens when payload includes function_call * refactor: add namespace for token_balance * fix(spendTokens): only execute if corresponding token type amounts are defined * refactor(checkBalance): throws Error if not enough token credits * refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run' * fix(abortMiddleware): circular dependency, and default to empty string for completionTokens * fix: properly cancel title requests when there isn't enough tokens to generate * feat(predictNewSummary): custom chain for summaries to allow signal passing refactor(summaryBuffer): use new custom chain * feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError * refactor(createStartHandler): if summary, add error details to runs * fix(OpenAIClient): support aborting from summarization & showing error to user refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer * refactor(logTokenCost -> recordTokenUsage): rename * refactor(checkBalance): include promptTokens in errorMessage * refactor(checkBalance/spendTokens): move to models dir * fix(createLanguageChain): correctly pass config * refactor(initializeLLM/title): add tokenBuffer of 150 for balance check * refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called * refactor(createStartHandler): add error to run if context is plugins as well * refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run * refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic * chore: use absolute equality for addTitle condition * refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional * style: icon changes to match official * fix(BaseClient): getTokenCountForResponse -> getTokenCount * fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc * refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled * fix(e2e/cleanUp): cleanup new collections, import all model methods from index * fix(config/add-balance): add uncaughtException listener * fix: circular dependency * refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance * fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped * fix(createStartHandler): correct condition for generations * chore: bump postcss due to moderate severity vulnerability * chore: bump zod due to low severity vulnerability * chore: bump openai & data-provider version * feat(types): OpenAI Message types * chore: update bun lockfile * refactor(CodeBlock): add error block formatting * refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON * chore(logViolation): delete user_id after error is logged * refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex * fix(DALL-E): use latest openai SDK * chore: reorganize imports, fix type issue * feat(server): add balance route * fix(api/models): add auth * feat(data-provider): /api/balance query * feat: show balance if checking is enabled, refetch on final message or error * chore: update docs, .env.example with token_usage info, add balance script command * fix(Balance): fallback to empty obj for balance query * style: slight adjustment of balance element * docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
});
describe('AWS Bedrock Model Tests', () => {
const awsModels = [
'anthropic.claude-3-5-haiku-20241022-v1:0',
'anthropic.claude-3-haiku-20240307-v1:0',
'anthropic.claude-3-sonnet-20240229-v1:0',
'anthropic.claude-3-opus-20240229-v1:0',
'anthropic.claude-3-5-sonnet-20240620-v1:0',
'anthropic.claude-v2:1',
'anthropic.claude-instant-v1',
'meta.llama2-13b-chat-v1',
'meta.llama2-70b-chat-v1',
'meta.llama3-8b-instruct-v1:0',
'meta.llama3-70b-instruct-v1:0',
'meta.llama3-1-8b-instruct-v1:0',
'meta.llama3-1-70b-instruct-v1:0',
'meta.llama3-1-405b-instruct-v1:0',
'mistral.mistral-7b-instruct-v0:2',
'mistral.mistral-small-2402-v1:0',
'mistral.mixtral-8x7b-instruct-v0:1',
'mistral.mistral-large-2402-v1:0',
'mistral.mistral-large-2407-v1:0',
'cohere.command-text-v14',
'cohere.command-light-text-v14',
'cohere.command-r-v1:0',
'cohere.command-r-plus-v1:0',
'ai21.j2-mid-v1',
'ai21.j2-ultra-v1',
'amazon.titan-text-lite-v1',
'amazon.titan-text-express-v1',
'amazon.nova-micro-v1:0',
'amazon.nova-lite-v1:0',
'amazon.nova-pro-v1:0',
];
it('should return the correct prompt multipliers for all models', () => {
const results = awsModels.map((model) => {
🪨 feat: AWS Bedrock support (#3935) * feat: Add BedrockIcon component to SVG library * feat: EModelEndpoint.bedrock * feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint * fix: declare endpoint in initialization step * chore: Update @librechat/agents dependency to version 1.4.5 * feat: backend content aggregation for agents/bedrock * feat: abort agent requests * feat: AWS Bedrock icons * WIP: agent provider schema parsing * chore: Update EditIcon props type * refactor(useGenerationsByLatest): make agents and bedrock editable * refactor: non-assistant message content, parts * fix: Bedrock response `sender` * fix: use endpointOption.model_parameters not endpointOption.modelOptions * fix: types for step handler * refactor: Update Agents.ToolCallDelta type * refactor: Remove unnecessary assignment of parentMessageId in AskController * refactor: remove unnecessary assignment of parentMessageId (agent request handler) * fix(bedrock/agents): message regeneration * refactor: dynamic form elements using react-hook-form Controllers * fix: agent icons/labels for messages * fix: agent actions * fix: use of new dynamic tags causing application crash * refactor: dynamic settings touch-ups * refactor: update Slider component to allow custom track class name * refactor: update DynamicSlider component styles * refactor: use Constants value for GLOBAL_PROJECT_NAME (enum) * feat: agent share global methods/controllers * fix: agents query * fix: `getResponseModel` * fix: share prompt a11y issue * refactor: update SharePrompt dialog theme styles * refactor: explicit typing for SharePrompt * feat: add agent roles/permissions * chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case * fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary * fix: handle text parts with tool_call_ids and empty text * fix: role initialization * refactor: don't make instructions required * refactor: improve typing of Text part * fix: setShowStopButton for agents route * chore: remove params for now * fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API * refactor: remove console.log statement in ContentRender component * chore: typing, rename Context to Delete Button * chore(DeleteButton): logging * refactor(Action): make accessible * style(Action): improve a11y again * refactor: remove use/mention of mongoose sessions * feat: first pass, sharing agents * feat: visual indicator for global agent, remove author when serving to non-author * wip: params * chore: fix typing issues * fix(schemas): typing * refactor: improve accessibility of ListCard component and fix console React warning * wip: reset templates for non-legacy new convos * Revert "wip: params" This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80. * Revert "refactor: dynamic form elements using react-hook-form Controllers" This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7. * fix(Parameters): types and parameter effect update to only update local state to parameters * refactor: optimize useDebouncedInput hook for better performance * feat: first pass, anthropic bedrock params * chore: paramEndpoints check for endpointType too * fix: maxTokens to use coerceNumber.optional(), * feat: extra chat model params * chore: reduce code repetition * refactor: improve preset title handling in SaveAsPresetDialog component * refactor: improve preset handling in HeaderOptions component * chore: improve typing, replace legacy dialog for SaveAsPresetDialog * feat: save as preset from parameters panel * fix: multi-search in select dropdown when using Option type * refactor: update default showDefault value to false in Dynamic components * feat: Bedrock presets settings * chore: config, fix agents schema, update config version * refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION * refactor: update baseEndpointSchema in config.ts to include baseURL property * refactor: update createRun function to include req parameter and set streamRate based on provider * feat: availableRegions via config * refactor: remove unused demo agent controller file * WIP: title * Update @librechat/agents to version 1.5.0 * chore: addTitle.js to handle empty responseText * feat: support images and titles * feat: context token updates * Refactor BaseClient test to use expect.objectContaining * refactor: add model select, remove header options params, move side panel params below prompts * chore: update models list, catch title error * feat: model service for bedrock models (env) * chore: Remove verbose debug log in AgentClient class following stream * feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models * refactor: handle streamRate in `handleLLMNewToken` callback * chore: AWS Bedrock example config in `.env.example` * refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers * refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes * feat: titleModel support for bedrock * refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
const valueKey = getValueKey(model, EModelEndpoint.bedrock);
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
});
expect(results.every(Boolean)).toBe(true);
});
it('should return the correct completion multipliers for all models', () => {
const results = awsModels.map((model) => {
🪨 feat: AWS Bedrock support (#3935) * feat: Add BedrockIcon component to SVG library * feat: EModelEndpoint.bedrock * feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint * fix: declare endpoint in initialization step * chore: Update @librechat/agents dependency to version 1.4.5 * feat: backend content aggregation for agents/bedrock * feat: abort agent requests * feat: AWS Bedrock icons * WIP: agent provider schema parsing * chore: Update EditIcon props type * refactor(useGenerationsByLatest): make agents and bedrock editable * refactor: non-assistant message content, parts * fix: Bedrock response `sender` * fix: use endpointOption.model_parameters not endpointOption.modelOptions * fix: types for step handler * refactor: Update Agents.ToolCallDelta type * refactor: Remove unnecessary assignment of parentMessageId in AskController * refactor: remove unnecessary assignment of parentMessageId (agent request handler) * fix(bedrock/agents): message regeneration * refactor: dynamic form elements using react-hook-form Controllers * fix: agent icons/labels for messages * fix: agent actions * fix: use of new dynamic tags causing application crash * refactor: dynamic settings touch-ups * refactor: update Slider component to allow custom track class name * refactor: update DynamicSlider component styles * refactor: use Constants value for GLOBAL_PROJECT_NAME (enum) * feat: agent share global methods/controllers * fix: agents query * fix: `getResponseModel` * fix: share prompt a11y issue * refactor: update SharePrompt dialog theme styles * refactor: explicit typing for SharePrompt * feat: add agent roles/permissions * chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case * fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary * fix: handle text parts with tool_call_ids and empty text * fix: role initialization * refactor: don't make instructions required * refactor: improve typing of Text part * fix: setShowStopButton for agents route * chore: remove params for now * fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API * refactor: remove console.log statement in ContentRender component * chore: typing, rename Context to Delete Button * chore(DeleteButton): logging * refactor(Action): make accessible * style(Action): improve a11y again * refactor: remove use/mention of mongoose sessions * feat: first pass, sharing agents * feat: visual indicator for global agent, remove author when serving to non-author * wip: params * chore: fix typing issues * fix(schemas): typing * refactor: improve accessibility of ListCard component and fix console React warning * wip: reset templates for non-legacy new convos * Revert "wip: params" This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80. * Revert "refactor: dynamic form elements using react-hook-form Controllers" This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7. * fix(Parameters): types and parameter effect update to only update local state to parameters * refactor: optimize useDebouncedInput hook for better performance * feat: first pass, anthropic bedrock params * chore: paramEndpoints check for endpointType too * fix: maxTokens to use coerceNumber.optional(), * feat: extra chat model params * chore: reduce code repetition * refactor: improve preset title handling in SaveAsPresetDialog component * refactor: improve preset handling in HeaderOptions component * chore: improve typing, replace legacy dialog for SaveAsPresetDialog * feat: save as preset from parameters panel * fix: multi-search in select dropdown when using Option type * refactor: update default showDefault value to false in Dynamic components * feat: Bedrock presets settings * chore: config, fix agents schema, update config version * refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION * refactor: update baseEndpointSchema in config.ts to include baseURL property * refactor: update createRun function to include req parameter and set streamRate based on provider * feat: availableRegions via config * refactor: remove unused demo agent controller file * WIP: title * Update @librechat/agents to version 1.5.0 * chore: addTitle.js to handle empty responseText * feat: support images and titles * feat: context token updates * Refactor BaseClient test to use expect.objectContaining * refactor: add model select, remove header options params, move side panel params below prompts * chore: update models list, catch title error * feat: model service for bedrock models (env) * chore: Remove verbose debug log in AgentClient class following stream * feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models * refactor: handle streamRate in `handleLLMNewToken` callback * chore: AWS Bedrock example config in `.env.example` * refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers * refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes * feat: titleModel support for bedrock * refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
const valueKey = getValueKey(model, EModelEndpoint.bedrock);
const multiplier = getMultiplier({ valueKey, tokenType: 'completion' });
return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion;
});
expect(results.every(Boolean)).toBe(true);
});
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
describe('Amazon Model Tests', () => {
describe('Amazon Nova Models', () => {
it('should return correct pricing for nova-premier', () => {
expect(getMultiplier({ model: 'nova-premier', tokenType: 'prompt' })).toBe(
tokenValues['nova-premier'].prompt,
);
expect(getMultiplier({ model: 'nova-premier', tokenType: 'completion' })).toBe(
tokenValues['nova-premier'].completion,
);
expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'prompt' })).toBe(
tokenValues['nova-premier'].prompt,
);
expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'completion' })).toBe(
tokenValues['nova-premier'].completion,
);
});
it('should return correct pricing for nova-pro', () => {
expect(getMultiplier({ model: 'nova-pro', tokenType: 'prompt' })).toBe(
tokenValues['nova-pro'].prompt,
);
expect(getMultiplier({ model: 'nova-pro', tokenType: 'completion' })).toBe(
tokenValues['nova-pro'].completion,
);
expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'prompt' })).toBe(
tokenValues['nova-pro'].prompt,
);
expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'completion' })).toBe(
tokenValues['nova-pro'].completion,
);
});
it('should return correct pricing for nova-lite', () => {
expect(getMultiplier({ model: 'nova-lite', tokenType: 'prompt' })).toBe(
tokenValues['nova-lite'].prompt,
);
expect(getMultiplier({ model: 'nova-lite', tokenType: 'completion' })).toBe(
tokenValues['nova-lite'].completion,
);
expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'prompt' })).toBe(
tokenValues['nova-lite'].prompt,
);
expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'completion' })).toBe(
tokenValues['nova-lite'].completion,
);
});
it('should return correct pricing for nova-micro', () => {
expect(getMultiplier({ model: 'nova-micro', tokenType: 'prompt' })).toBe(
tokenValues['nova-micro'].prompt,
);
expect(getMultiplier({ model: 'nova-micro', tokenType: 'completion' })).toBe(
tokenValues['nova-micro'].completion,
);
expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'prompt' })).toBe(
tokenValues['nova-micro'].prompt,
);
expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'completion' })).toBe(
tokenValues['nova-micro'].completion,
);
});
it('should match both short and full model names to the same pricing', () => {
const models = ['nova-micro', 'nova-lite', 'nova-pro', 'nova-premier'];
const fullModels = [
'amazon.nova-micro-v1:0',
'amazon.nova-lite-v1:0',
'amazon.nova-pro-v1:0',
'amazon.nova-premier-v1:0',
];
models.forEach((shortModel, i) => {
const fullModel = fullModels[i];
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
expect(shortPrompt).toBe(fullPrompt);
expect(shortCompletion).toBe(fullCompletion);
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
});
});
});
describe('Amazon Titan Models', () => {
it('should return correct pricing for titan-text-premier', () => {
expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'prompt' })).toBe(
tokenValues['titan-text-premier'].prompt,
);
expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'completion' })).toBe(
tokenValues['titan-text-premier'].completion,
);
expect(getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'prompt' })).toBe(
tokenValues['titan-text-premier'].prompt,
);
expect(
getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'completion' }),
).toBe(tokenValues['titan-text-premier'].completion);
});
it('should return correct pricing for titan-text-express', () => {
expect(getMultiplier({ model: 'titan-text-express', tokenType: 'prompt' })).toBe(
tokenValues['titan-text-express'].prompt,
);
expect(getMultiplier({ model: 'titan-text-express', tokenType: 'completion' })).toBe(
tokenValues['titan-text-express'].completion,
);
expect(getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'prompt' })).toBe(
tokenValues['titan-text-express'].prompt,
);
expect(
getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'completion' }),
).toBe(tokenValues['titan-text-express'].completion);
});
it('should return correct pricing for titan-text-lite', () => {
expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'prompt' })).toBe(
tokenValues['titan-text-lite'].prompt,
);
expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'completion' })).toBe(
tokenValues['titan-text-lite'].completion,
);
expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'prompt' })).toBe(
tokenValues['titan-text-lite'].prompt,
);
expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'completion' })).toBe(
tokenValues['titan-text-lite'].completion,
);
});
it('should match both short and full model names to the same pricing', () => {
const models = ['titan-text-lite', 'titan-text-express', 'titan-text-premier'];
const fullModels = [
'amazon.titan-text-lite-v1',
'amazon.titan-text-express-v1',
'amazon.titan-text-premier-v1:0',
];
models.forEach((shortModel, i) => {
const fullModel = fullModels[i];
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
expect(shortPrompt).toBe(fullPrompt);
expect(shortCompletion).toBe(fullCompletion);
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
});
});
});
});
describe('AI21 Model Tests', () => {
describe('AI21 J2 Models', () => {
it('should return correct pricing for j2-mid', () => {
expect(getMultiplier({ model: 'j2-mid', tokenType: 'prompt' })).toBe(
tokenValues['j2-mid'].prompt,
);
expect(getMultiplier({ model: 'j2-mid', tokenType: 'completion' })).toBe(
tokenValues['j2-mid'].completion,
);
expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'prompt' })).toBe(
tokenValues['j2-mid'].prompt,
);
expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'completion' })).toBe(
tokenValues['j2-mid'].completion,
);
});
it('should return correct pricing for j2-ultra', () => {
expect(getMultiplier({ model: 'j2-ultra', tokenType: 'prompt' })).toBe(
tokenValues['j2-ultra'].prompt,
);
expect(getMultiplier({ model: 'j2-ultra', tokenType: 'completion' })).toBe(
tokenValues['j2-ultra'].completion,
);
expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'prompt' })).toBe(
tokenValues['j2-ultra'].prompt,
);
expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'completion' })).toBe(
tokenValues['j2-ultra'].completion,
);
});
it('should match both short and full model names to the same pricing', () => {
const models = ['j2-mid', 'j2-ultra'];
const fullModels = ['ai21.j2-mid-v1', 'ai21.j2-ultra-v1'];
models.forEach((shortModel, i) => {
const fullModel = fullModels[i];
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
expect(shortPrompt).toBe(fullPrompt);
expect(shortCompletion).toBe(fullCompletion);
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
});
});
});
describe('AI21 Jamba Models', () => {
it('should return correct pricing for jamba-instruct', () => {
expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' })).toBe(
tokenValues['jamba-instruct'].prompt,
);
expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' })).toBe(
tokenValues['jamba-instruct'].completion,
);
expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'prompt' })).toBe(
tokenValues['jamba-instruct'].prompt,
);
expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'completion' })).toBe(
tokenValues['jamba-instruct'].completion,
);
});
it('should match both short and full model names to the same pricing', () => {
const shortPrompt = getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' });
const fullPrompt = getMultiplier({
model: 'ai21.jamba-instruct-v1:0',
tokenType: 'prompt',
});
const shortCompletion = getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' });
const fullCompletion = getMultiplier({
model: 'ai21.jamba-instruct-v1:0',
tokenType: 'completion',
});
expect(shortPrompt).toBe(fullPrompt);
expect(shortCompletion).toBe(fullCompletion);
expect(shortPrompt).toBe(tokenValues['jamba-instruct'].prompt);
expect(shortCompletion).toBe(tokenValues['jamba-instruct'].completion);
});
});
});
describe('Deepseek Model Tests', () => {
🔗 feat: Agent Chain (Mixture-of-Agents) (#6374) * wip: first pass, dropdown for selecting sequential agents * refactor: Improve agent selection logic and enhance performance in SequentialAgents component * wip: seq. agents working ideas * wip: sequential agents style change * refactor: move agent form options/submission outside of AgentConfig * refactor: prevent repeating code * refactor: simplify current agent display in SequentialAgents component * feat: persist form value handling in AgentSelect component for agent_ids * feat: first pass, sequential agnets agent update * feat: enhance message display with agent updates and empty text handling * chore: update Icon component to use EModelEndpoint for agent endpoints * feat: update content type checks in BaseClient to use constants for better readability * feat: adjust max context tokens calculation to use 90% of the model's max tokens * feat: first pass, agent run message pruning * chore: increase max listeners for abort controller to prevent memory leaks * feat: enhance runAgent function to include current index count map for improved token tracking * chore: update @librechat/agents dependency to version 2.2.5 * feat: update icons and style of SequentialAgents component for improved UI consistency * feat: add AdvancedButton and AdvancedPanel components for enhanced agent settings navigation, update styling for agent form * chore: adjust minimum height of AdvancedPanel component for better layout consistency * chore: update @librechat/agents dependency to version 2.2.6 * feat: enhance message formatting by incorporating tool set into agent message processing, in order to allow better mix/matching of agents (as tool calls for tools not found in set will be stringified) * refactor: reorder components in AgentConfig for improved readability and maintainability * refactor: enhance layout of AgentUpdate component for improved visual structure * feat: add DeepSeek provider to Bedrock settings and schemas * feat: enhance link styling in mobile.css for better visibility and accessibility * fix: update banner model import in update banner script; export Banner model * refactor: `duplicateAgentHandler` to include tool_resources only for OCR context files * feat: add 'qwen-vl' to visionModels for enhanced model support * fix: change image format from JPEG to PNG in DALLE3 response * feat: reorganize Advanced components and add localizations * refactor: simplify JSX structure in AgentChain component to defer container styling to parent * feat: add FormInput component for reusable input handling * feat: make agent recursion limit configurable from builder * feat: add support for agent capabilities chain in AdvancedPanel and update data-provider version * feat: add maxRecursionLimit configuration for agents and update related documentation * fix: update CONFIG_VERSION to 1.2.3 in data provider configuration * feat: replace recursion limit input with MaxAgentSteps component and enhance input handling * feat: enhance AgentChain component with hover card for additional information and update related labels * fix: pass request and response objects to `createActionTool` when using assistant actions to prevent auth error * feat: update AgentChain component layout to include agent count display * feat: increase default max listeners and implement capability check function for agent chain * fix: update link styles in mobile.css for better visibility in dark mode * chore: temp. remove agents package while bumping shared packages * chore: update @langchain/google-genai package to version 0.1.11 * chore: update @langchain/google-vertexai package to version 0.2.2 * chore: add @librechat/agents package at version 2.2.8 * feat: add deepseek.r1 model with token rate and context values for bedrock
2025-03-17 16:43:44 -04:00
const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner', 'deepseek.r1'];
it('should return the correct prompt multipliers for all models', () => {
const results = deepseekModels.map((model) => {
const valueKey = getValueKey(model);
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
});
expect(results.every(Boolean)).toBe(true);
});
it('should return the correct completion multipliers for all models', () => {
const results = deepseekModels.map((model) => {
const valueKey = getValueKey(model);
const multiplier = getMultiplier({ valueKey, tokenType: 'completion' });
return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion;
});
expect(results.every(Boolean)).toBe(true);
});
it('should return the correct prompt multipliers for reasoning model', () => {
const model = 'deepseek-reasoner';
const valueKey = getValueKey(model);
expect(valueKey).toBe(model);
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
const result = tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
expect(result).toBe(true);
});
it('should return correct pricing for deepseek-chat', () => {
expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'prompt' })).toBe(
tokenValues['deepseek-chat'].prompt,
);
expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'completion' })).toBe(
tokenValues['deepseek-chat'].completion,
);
expect(tokenValues['deepseek-chat'].prompt).toBe(0.28);
expect(tokenValues['deepseek-chat'].completion).toBe(0.42);
});
it('should return correct pricing for deepseek-reasoner', () => {
expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'prompt' })).toBe(
tokenValues['deepseek-reasoner'].prompt,
);
expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'completion' })).toBe(
tokenValues['deepseek-reasoner'].completion,
);
expect(tokenValues['deepseek-reasoner'].prompt).toBe(0.28);
expect(tokenValues['deepseek-reasoner'].completion).toBe(0.42);
});
it('should handle DeepSeek model name variations with provider prefixes', () => {
const modelVariations = [
'deepseek/deepseek-chat',
'openrouter/deepseek-chat',
'deepseek/deepseek-reasoner',
];
modelVariations.forEach((model) => {
const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' });
const completionMultiplier = getMultiplier({ model, tokenType: 'completion' });
expect(promptMultiplier).toBe(0.28);
expect(completionMultiplier).toBe(0.42);
});
});
it('should return correct cache multipliers for DeepSeek models', () => {
expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'write' })).toBe(
cacheTokenValues['deepseek-chat'].write,
);
expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'read' })).toBe(
cacheTokenValues['deepseek-chat'].read,
);
expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'write' })).toBe(
cacheTokenValues['deepseek-reasoner'].write,
);
expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'read' })).toBe(
cacheTokenValues['deepseek-reasoner'].read,
);
});
it('should return correct cache pricing values for DeepSeek models', () => {
expect(cacheTokenValues['deepseek-chat'].write).toBe(0.28);
expect(cacheTokenValues['deepseek-chat'].read).toBe(0.028);
expect(cacheTokenValues['deepseek-reasoner'].write).toBe(0.28);
expect(cacheTokenValues['deepseek-reasoner'].read).toBe(0.028);
expect(cacheTokenValues['deepseek'].write).toBe(0.28);
expect(cacheTokenValues['deepseek'].read).toBe(0.028);
});
it('should handle DeepSeek cache multipliers with model variations', () => {
const modelVariations = ['deepseek/deepseek-chat', 'openrouter/deepseek-reasoner'];
modelVariations.forEach((model) => {
const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
expect(writeMultiplier).toBe(0.28);
expect(readMultiplier).toBe(0.028);
});
});
});
🌙 feat: Moonshot Provider Support (#11621) * ✨ feat: Add Moonshot Provider Support - Updated the `isKnownCustomProvider` function to include `Providers.MOONSHOT` in the list of recognized custom providers. - Enhanced the `providerConfigMap` to initialize `MOONSHOT` with the custom initialization function. - Introduced `MoonshotIcon` component for visual representation in the UI, integrated into the `UnknownIcon` component. - Updated various files across the API and client to support the new `MOONSHOT` provider, including configuration and response handling. This update expands the capabilities of the application by integrating support for the Moonshot provider, enhancing both backend and frontend functionalities. * ✨ feat: Add Moonshot/Kimi Model Pricing and Tests - Introduced new pricing configurations for Moonshot and Kimi models in `tx.js`, including various model variations and their respective prompt and completion values. - Expanded unit tests in `tx.spec.js` and `tokens.spec.js` to validate pricing and token limits for the newly added Moonshot/Kimi models, ensuring accurate calculations and handling of model variations. - Updated utility functions to support the new model structures and ensure compatibility with existing functionalities. This update enhances the pricing model capabilities and improves test coverage for the Moonshot/Kimi integration. * ✨ feat: Enhance Token Pricing Documentation and Configuration - Added comprehensive documentation for token pricing configuration in `tx.js` and `tokens.ts`, emphasizing the importance of key ordering for pattern matching. - Clarified the process for defining base and specific patterns to ensure accurate pricing retrieval based on model names. - Improved code comments to guide future additions of model families, enhancing maintainability and understanding of the pricing structure. This update improves the clarity and usability of the token pricing configuration, facilitating better integration and future enhancements. * chore: import order * chore: linting
2026-02-04 10:53:57 +01:00
describe('Moonshot/Kimi Model Tests - Pricing', () => {
describe('Kimi Models', () => {
it('should return correct pricing for kimi base pattern', () => {
expect(getMultiplier({ model: 'kimi', tokenType: 'prompt' })).toBe(
tokenValues['kimi'].prompt,
);
expect(getMultiplier({ model: 'kimi', tokenType: 'completion' })).toBe(
tokenValues['kimi'].completion,
);
});
it('should return correct pricing for kimi-k2.5', () => {
expect(getMultiplier({ model: 'kimi-k2.5', tokenType: 'prompt' })).toBe(
tokenValues['kimi-k2.5'].prompt,
);
expect(getMultiplier({ model: 'kimi-k2.5', tokenType: 'completion' })).toBe(
tokenValues['kimi-k2.5'].completion,
);
});
it('should return correct pricing for kimi-k2 series', () => {
expect(getMultiplier({ model: 'kimi-k2', tokenType: 'prompt' })).toBe(
tokenValues['kimi-k2'].prompt,
);
expect(getMultiplier({ model: 'kimi-k2', tokenType: 'completion' })).toBe(
tokenValues['kimi-k2'].completion,
);
});
it('should return correct pricing for kimi-k2-turbo (higher pricing)', () => {
expect(getMultiplier({ model: 'kimi-k2-turbo', tokenType: 'prompt' })).toBe(
tokenValues['kimi-k2-turbo'].prompt,
);
expect(getMultiplier({ model: 'kimi-k2-turbo', tokenType: 'completion' })).toBe(
tokenValues['kimi-k2-turbo'].completion,
);
});
it('should return correct pricing for kimi-k2-thinking models', () => {
expect(getMultiplier({ model: 'kimi-k2-thinking', tokenType: 'prompt' })).toBe(
tokenValues['kimi-k2-thinking'].prompt,
);
expect(getMultiplier({ model: 'kimi-k2-thinking', tokenType: 'completion' })).toBe(
tokenValues['kimi-k2-thinking'].completion,
);
expect(getMultiplier({ model: 'kimi-k2-thinking-turbo', tokenType: 'prompt' })).toBe(
tokenValues['kimi-k2-thinking-turbo'].prompt,
);
expect(getMultiplier({ model: 'kimi-k2-thinking-turbo', tokenType: 'completion' })).toBe(
tokenValues['kimi-k2-thinking-turbo'].completion,
);
});
it('should handle Kimi model variations with provider prefixes', () => {
const modelVariations = ['openrouter/kimi-k2', 'openrouter/kimi-k2.5', 'openrouter/kimi'];
modelVariations.forEach((model) => {
const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' });
const completionMultiplier = getMultiplier({ model, tokenType: 'completion' });
expect(promptMultiplier).toBe(tokenValues['kimi'].prompt);
expect([tokenValues['kimi'].completion, tokenValues['kimi-k2.5'].completion]).toContain(
completionMultiplier,
);
});
});
});
describe('Moonshot Models', () => {
it('should return correct pricing for moonshot base pattern (128k pricing)', () => {
expect(getMultiplier({ model: 'moonshot', tokenType: 'prompt' })).toBe(
tokenValues['moonshot'].prompt,
);
expect(getMultiplier({ model: 'moonshot', tokenType: 'completion' })).toBe(
tokenValues['moonshot'].completion,
);
});
it('should return correct pricing for moonshot-v1-8k', () => {
expect(getMultiplier({ model: 'moonshot-v1-8k', tokenType: 'prompt' })).toBe(
tokenValues['moonshot-v1-8k'].prompt,
);
expect(getMultiplier({ model: 'moonshot-v1-8k', tokenType: 'completion' })).toBe(
tokenValues['moonshot-v1-8k'].completion,
);
});
it('should return correct pricing for moonshot-v1-32k', () => {
expect(getMultiplier({ model: 'moonshot-v1-32k', tokenType: 'prompt' })).toBe(
tokenValues['moonshot-v1-32k'].prompt,
);
expect(getMultiplier({ model: 'moonshot-v1-32k', tokenType: 'completion' })).toBe(
tokenValues['moonshot-v1-32k'].completion,
);
});
it('should return correct pricing for moonshot-v1-128k', () => {
expect(getMultiplier({ model: 'moonshot-v1-128k', tokenType: 'prompt' })).toBe(
tokenValues['moonshot-v1-128k'].prompt,
);
expect(getMultiplier({ model: 'moonshot-v1-128k', tokenType: 'completion' })).toBe(
tokenValues['moonshot-v1-128k'].completion,
);
});
it('should return correct pricing for moonshot-v1 vision models', () => {
expect(getMultiplier({ model: 'moonshot-v1-8k-vision', tokenType: 'prompt' })).toBe(
tokenValues['moonshot-v1-8k-vision'].prompt,
);
expect(getMultiplier({ model: 'moonshot-v1-8k-vision', tokenType: 'completion' })).toBe(
tokenValues['moonshot-v1-8k-vision'].completion,
);
expect(getMultiplier({ model: 'moonshot-v1-32k-vision', tokenType: 'prompt' })).toBe(
tokenValues['moonshot-v1-32k-vision'].prompt,
);
expect(getMultiplier({ model: 'moonshot-v1-32k-vision', tokenType: 'completion' })).toBe(
tokenValues['moonshot-v1-32k-vision'].completion,
);
expect(getMultiplier({ model: 'moonshot-v1-128k-vision', tokenType: 'prompt' })).toBe(
tokenValues['moonshot-v1-128k-vision'].prompt,
);
expect(getMultiplier({ model: 'moonshot-v1-128k-vision', tokenType: 'completion' })).toBe(
tokenValues['moonshot-v1-128k-vision'].completion,
);
});
});
describe('Kimi Cache Multipliers', () => {
it('should return correct cache multipliers for kimi-k2 models', () => {
expect(getCacheMultiplier({ model: 'kimi', cacheType: 'write' })).toBe(
cacheTokenValues['kimi'].write,
);
expect(getCacheMultiplier({ model: 'kimi', cacheType: 'read' })).toBe(
cacheTokenValues['kimi'].read,
);
});
it('should return correct cache multipliers for kimi-k2.5 (lower read price)', () => {
expect(getCacheMultiplier({ model: 'kimi-k2.5', cacheType: 'write' })).toBe(
cacheTokenValues['kimi-k2.5'].write,
);
expect(getCacheMultiplier({ model: 'kimi-k2.5', cacheType: 'read' })).toBe(
cacheTokenValues['kimi-k2.5'].read,
);
});
it('should return correct cache multipliers for kimi-k2-turbo', () => {
expect(getCacheMultiplier({ model: 'kimi-k2-turbo', cacheType: 'write' })).toBe(
cacheTokenValues['kimi-k2-turbo'].write,
);
expect(getCacheMultiplier({ model: 'kimi-k2-turbo', cacheType: 'read' })).toBe(
cacheTokenValues['kimi-k2-turbo'].read,
);
});
it('should handle Kimi cache multipliers with model variations', () => {
const modelVariations = ['openrouter/kimi-k2', 'openrouter/kimi'];
modelVariations.forEach((model) => {
const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
expect(writeMultiplier).toBe(cacheTokenValues['kimi'].write);
expect(readMultiplier).toBe(cacheTokenValues['kimi'].read);
});
});
});
describe('Bedrock Moonshot Models', () => {
it('should return correct pricing for Bedrock moonshot models', () => {
expect(getMultiplier({ model: 'moonshot.kimi', tokenType: 'prompt' })).toBe(
tokenValues['moonshot.kimi'].prompt,
);
expect(getMultiplier({ model: 'moonshot.kimi', tokenType: 'completion' })).toBe(
tokenValues['moonshot.kimi'].completion,
);
expect(getMultiplier({ model: 'moonshot.kimi-k2', tokenType: 'prompt' })).toBe(
tokenValues['moonshot.kimi-k2'].prompt,
);
expect(getMultiplier({ model: 'moonshot.kimi-k2.5', tokenType: 'prompt' })).toBe(
tokenValues['moonshot.kimi-k2.5'].prompt,
);
expect(getMultiplier({ model: 'moonshot.kimi-k2.5', tokenType: 'completion' })).toBe(
tokenValues['moonshot.kimi-k2.5'].completion,
);
});
});
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
describe('Qwen3 Model Tests', () => {
describe('Qwen3 Base Models', () => {
it('should return correct pricing for qwen3 base pattern', () => {
expect(getMultiplier({ model: 'qwen3', tokenType: 'prompt' })).toBe(
tokenValues['qwen3'].prompt,
);
expect(getMultiplier({ model: 'qwen3', tokenType: 'completion' })).toBe(
tokenValues['qwen3'].completion,
);
});
it('should return correct pricing for qwen3-4b (falls back to qwen3)', () => {
expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3'].prompt,
);
expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'completion' })).toBe(
tokenValues['qwen3'].completion,
);
});
it('should return correct pricing for qwen3-8b', () => {
expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-8b'].prompt,
);
expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'completion' })).toBe(
tokenValues['qwen3-8b'].completion,
);
});
it('should return correct pricing for qwen3-14b', () => {
expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-14b'].prompt,
);
expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'completion' })).toBe(
tokenValues['qwen3-14b'].completion,
);
});
it('should return correct pricing for qwen3-235b-a22b', () => {
expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-235b-a22b'].prompt,
);
expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'completion' })).toBe(
tokenValues['qwen3-235b-a22b'].completion,
);
});
it('should handle model name variations with provider prefixes', () => {
const models = [
{ input: 'qwen3', expected: 'qwen3' },
{ input: 'qwen3-4b', expected: 'qwen3' },
{ input: 'qwen3-8b', expected: 'qwen3-8b' },
{ input: 'qwen3-32b', expected: 'qwen3-32b' },
];
models.forEach(({ input, expected }) => {
const withPrefix = `alibaba/${input}`;
expect(getMultiplier({ model: withPrefix, tokenType: 'prompt' })).toBe(
tokenValues[expected].prompt,
);
expect(getMultiplier({ model: withPrefix, tokenType: 'completion' })).toBe(
tokenValues[expected].completion,
);
});
});
});
describe('Qwen3 VL (Vision-Language) Models', () => {
it('should return correct pricing for qwen3-vl-8b-thinking', () => {
expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-vl-8b-thinking'].prompt,
);
expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'completion' })).toBe(
tokenValues['qwen3-vl-8b-thinking'].completion,
);
});
it('should return correct pricing for qwen3-vl-8b-instruct', () => {
expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-vl-8b-instruct'].prompt,
);
expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'completion' })).toBe(
tokenValues['qwen3-vl-8b-instruct'].completion,
);
});
it('should return correct pricing for qwen3-vl-30b-a3b', () => {
expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-vl-30b-a3b'].prompt,
);
expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'completion' })).toBe(
tokenValues['qwen3-vl-30b-a3b'].completion,
);
});
it('should return correct pricing for qwen3-vl-235b-a22b', () => {
expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-vl-235b-a22b'].prompt,
);
expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'completion' })).toBe(
tokenValues['qwen3-vl-235b-a22b'].completion,
);
});
});
describe('Qwen3 Specialized Models', () => {
it('should return correct pricing for qwen3-max', () => {
expect(getMultiplier({ model: 'qwen3-max', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-max'].prompt,
);
expect(getMultiplier({ model: 'qwen3-max', tokenType: 'completion' })).toBe(
tokenValues['qwen3-max'].completion,
);
});
it('should return correct pricing for qwen3-coder', () => {
expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-coder'].prompt,
);
expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'completion' })).toBe(
tokenValues['qwen3-coder'].completion,
);
});
it('should return correct pricing for qwen3-coder-plus', () => {
expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-coder-plus'].prompt,
);
expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'completion' })).toBe(
tokenValues['qwen3-coder-plus'].completion,
);
});
it('should return correct pricing for qwen3-coder-flash', () => {
expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-coder-flash'].prompt,
);
expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'completion' })).toBe(
tokenValues['qwen3-coder-flash'].completion,
);
});
it('should return correct pricing for qwen3-next-80b-a3b', () => {
expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'prompt' })).toBe(
tokenValues['qwen3-next-80b-a3b'].prompt,
);
expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'completion' })).toBe(
tokenValues['qwen3-next-80b-a3b'].completion,
);
});
});
describe('Qwen3 Model Variations', () => {
it('should handle all qwen3 models with provider prefixes', () => {
const models = ['qwen3', 'qwen3-8b', 'qwen3-max', 'qwen3-coder', 'qwen3-vl-8b-instruct'];
const prefixes = ['alibaba', 'qwen', 'openrouter'];
models.forEach((model) => {
prefixes.forEach((prefix) => {
const fullModel = `${prefix}/${model}`;
expect(getMultiplier({ model: fullModel, tokenType: 'prompt' })).toBe(
tokenValues[model].prompt,
);
expect(getMultiplier({ model: fullModel, tokenType: 'completion' })).toBe(
tokenValues[model].completion,
);
});
});
});
it('should handle qwen3-4b falling back to qwen3 base pattern', () => {
const testCases = ['qwen3-4b', 'alibaba/qwen3-4b', 'qwen/qwen3-4b-preview'];
testCases.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['qwen3'].prompt);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
tokenValues['qwen3'].completion,
);
});
});
});
});
describe('getCacheMultiplier', () => {
it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(
cacheTokenValues['claude-3-5-sonnet'].write,
);
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'read' })).toBe(
cacheTokenValues['claude-3-5-sonnet'].read,
);
expect(getCacheMultiplier({ valueKey: 'claude-3-5-haiku', cacheType: 'write' })).toBe(
cacheTokenValues['claude-3-5-haiku'].write,
);
expect(getCacheMultiplier({ valueKey: 'claude-3-5-haiku', cacheType: 'read' })).toBe(
cacheTokenValues['claude-3-5-haiku'].read,
);
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'write' })).toBe(
cacheTokenValues['claude-3-haiku'].write,
);
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'read' })).toBe(
cacheTokenValues['claude-3-haiku'].read,
);
});
it('should return null if cacheType is provided but not found in cacheTokenValues', () => {
expect(
getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'unknownType' }),
).toBeNull();
});
it('should derive the valueKey from the model if not provided', () => {
expect(getCacheMultiplier({ cacheType: 'write', model: 'claude-3-5-sonnet-20240620' })).toBe(
cacheTokenValues['claude-3-5-sonnet'].write,
);
expect(getCacheMultiplier({ cacheType: 'read', model: 'claude-3-haiku-20240307' })).toBe(
cacheTokenValues['claude-3-haiku'].read,
);
});
it('should return null if only model or cacheType is missing', () => {
expect(getCacheMultiplier({ cacheType: 'write' })).toBeNull();
expect(getCacheMultiplier({ model: 'claude-3-5-sonnet' })).toBeNull();
});
it('should return null if derived valueKey does not match any known patterns', () => {
expect(getCacheMultiplier({ cacheType: 'write', model: 'gpt-4-some-other-info' })).toBeNull();
});
it('should handle endpointTokenConfig if provided', () => {
const endpointTokenConfig = {
'custom-model': {
write: 5,
read: 1,
},
};
expect(
getCacheMultiplier({ model: 'custom-model', cacheType: 'write', endpointTokenConfig }),
).toBe(endpointTokenConfig['custom-model'].write);
expect(
getCacheMultiplier({ model: 'custom-model', cacheType: 'read', endpointTokenConfig }),
).toBe(endpointTokenConfig['custom-model'].read);
});
it('should return null if model is not found in endpointTokenConfig', () => {
const endpointTokenConfig = {
'custom-model': {
write: 5,
read: 1,
},
};
expect(
getCacheMultiplier({ model: 'unknown-model', cacheType: 'write', endpointTokenConfig }),
).toBeNull();
});
it('should handle models with "bedrock/" prefix', () => {
expect(
getCacheMultiplier({
model: 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',
cacheType: 'write',
}),
).toBe(cacheTokenValues['claude-3-5-sonnet'].write);
expect(
getCacheMultiplier({
model: 'bedrock/anthropic.claude-3-haiku-20240307-v1:0',
cacheType: 'read',
}),
).toBe(cacheTokenValues['claude-3-haiku'].read);
});
});
describe('Google Model Tests', () => {
const googleModels = [
'gemini-3',
'gemini-2.5-pro',
'gemini-2.5-flash',
'gemini-2.5-flash-lite',
'gemini-2.5-pro-preview-05-06',
'gemini-2.5-flash-preview-04-17',
'gemini-2.5-exp',
'gemini-2.0-flash-lite-preview-02-05',
'gemini-2.0-flash-001',
'gemini-2.0-flash-exp',
'gemini-2.0-pro-exp-02-05',
'gemini-1.5-flash-8b',
'gemini-1.5-flash-thinking',
'gemini-1.5-pro-latest',
'gemini-1.5-pro-preview-0409',
'gemini-pro-vision',
'gemini-1.0',
'gemini-pro',
];
it('should return the correct prompt and completion rates for all models', () => {
const results = googleModels.map((model) => {
const valueKey = getValueKey(model, EModelEndpoint.google);
const promptRate = getMultiplier({
model,
tokenType: 'prompt',
endpoint: EModelEndpoint.google,
});
const completionRate = getMultiplier({
model,
tokenType: 'completion',
endpoint: EModelEndpoint.google,
});
return { model, valueKey, promptRate, completionRate };
});
results.forEach(({ valueKey, promptRate, completionRate }) => {
expect(promptRate).toBe(tokenValues[valueKey].prompt);
expect(completionRate).toBe(tokenValues[valueKey].completion);
});
});
it('should map to the correct model keys', () => {
const expected = {
'gemini-3': 'gemini-3',
'gemini-2.5-pro': 'gemini-2.5-pro',
'gemini-2.5-flash': 'gemini-2.5-flash',
'gemini-2.5-flash-lite': 'gemini-2.5-flash-lite',
'gemini-2.5-pro-preview-05-06': 'gemini-2.5-pro',
'gemini-2.5-flash-preview-04-17': 'gemini-2.5-flash',
'gemini-2.5-exp': 'gemini-2.5',
'gemini-2.0-flash-lite-preview-02-05': 'gemini-2.0-flash-lite',
'gemini-2.0-flash-001': 'gemini-2.0-flash',
'gemini-2.0-flash-exp': 'gemini-2.0-flash',
'gemini-2.0-pro-exp-02-05': 'gemini-2.0',
'gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
'gemini-1.5-flash-thinking': 'gemini-1.5-flash',
'gemini-1.5-pro-latest': 'gemini-1.5',
'gemini-1.5-pro-preview-0409': 'gemini-1.5',
'gemini-pro-vision': 'gemini-pro-vision',
'gemini-1.0': 'gemini',
'gemini-pro': 'gemini',
};
Object.entries(expected).forEach(([model, expectedKey]) => {
const valueKey = getValueKey(model, EModelEndpoint.google);
expect(valueKey).toBe(expectedKey);
});
});
it('should handle model names with different formats', () => {
const testCases = [
{ input: 'google/gemini-pro', expected: 'gemini' },
{ input: 'gemini-pro/google', expected: 'gemini' },
{ input: 'google/gemini-2.0-flash-lite', expected: 'gemini-2.0-flash-lite' },
];
testCases.forEach(({ input, expected }) => {
const valueKey = getValueKey(input, EModelEndpoint.google);
expect(valueKey).toBe(expected);
expect(
getMultiplier({ model: input, tokenType: 'prompt', endpoint: EModelEndpoint.google }),
).toBe(tokenValues[expected].prompt);
expect(
getMultiplier({ model: input, tokenType: 'completion', endpoint: EModelEndpoint.google }),
).toBe(tokenValues[expected].completion);
});
});
});
describe('Grok Model Tests - Pricing', () => {
describe('getMultiplier', () => {
test('should return correct prompt and completion rates for Grok vision models', () => {
const models = ['grok-2-vision-1212', 'grok-2-vision', 'grok-2-vision-latest'];
models.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
tokenValues['grok-2-vision'].prompt,
);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
tokenValues['grok-2-vision'].completion,
);
});
});
test('should return correct prompt and completion rates for Grok text models', () => {
const models = ['grok-2-1212', 'grok-2', 'grok-2-latest'];
models.forEach((model) => {
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['grok-2'].prompt);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
tokenValues['grok-2'].completion,
);
});
});
test('should return correct prompt and completion rates for Grok beta models', () => {
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'prompt' })).toBe(
tokenValues['grok-vision-beta'].prompt,
);
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'completion' })).toBe(
tokenValues['grok-vision-beta'].completion,
);
expect(getMultiplier({ model: 'grok-beta', tokenType: 'prompt' })).toBe(
tokenValues['grok-beta'].prompt,
);
expect(getMultiplier({ model: 'grok-beta', tokenType: 'completion' })).toBe(
tokenValues['grok-beta'].completion,
);
});
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
test('should return correct prompt and completion rates for Grok 3 models', () => {
expect(getMultiplier({ model: 'grok-3', tokenType: 'prompt' })).toBe(
tokenValues['grok-3'].prompt,
);
expect(getMultiplier({ model: 'grok-3', tokenType: 'completion' })).toBe(
tokenValues['grok-3'].completion,
);
expect(getMultiplier({ model: 'grok-3-fast', tokenType: 'prompt' })).toBe(
tokenValues['grok-3-fast'].prompt,
);
expect(getMultiplier({ model: 'grok-3-fast', tokenType: 'completion' })).toBe(
tokenValues['grok-3-fast'].completion,
);
expect(getMultiplier({ model: 'grok-3-mini', tokenType: 'prompt' })).toBe(
tokenValues['grok-3-mini'].prompt,
);
expect(getMultiplier({ model: 'grok-3-mini', tokenType: 'completion' })).toBe(
tokenValues['grok-3-mini'].completion,
);
expect(getMultiplier({ model: 'grok-3-mini-fast', tokenType: 'prompt' })).toBe(
tokenValues['grok-3-mini-fast'].prompt,
);
expect(getMultiplier({ model: 'grok-3-mini-fast', tokenType: 'completion' })).toBe(
tokenValues['grok-3-mini-fast'].completion,
);
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
});
test('should return correct prompt and completion rates for Grok 4 model', () => {
expect(getMultiplier({ model: 'grok-4-0709', tokenType: 'prompt' })).toBe(
tokenValues['grok-4'].prompt,
);
expect(getMultiplier({ model: 'grok-4-0709', tokenType: 'completion' })).toBe(
tokenValues['grok-4'].completion,
);
});
test('should return correct prompt and completion rates for Grok 4 Fast model', () => {
expect(getMultiplier({ model: 'grok-4-fast', tokenType: 'prompt' })).toBe(
tokenValues['grok-4-fast'].prompt,
);
expect(getMultiplier({ model: 'grok-4-fast', tokenType: 'completion' })).toBe(
tokenValues['grok-4-fast'].completion,
);
});
test('should return correct prompt and completion rates for Grok 4.1 Fast models', () => {
expect(getMultiplier({ model: 'grok-4-1-fast-reasoning', tokenType: 'prompt' })).toBe(
tokenValues['grok-4-1-fast'].prompt,
);
expect(getMultiplier({ model: 'grok-4-1-fast-reasoning', tokenType: 'completion' })).toBe(
tokenValues['grok-4-1-fast'].completion,
);
expect(getMultiplier({ model: 'grok-4-1-fast-non-reasoning', tokenType: 'prompt' })).toBe(
tokenValues['grok-4-1-fast'].prompt,
);
expect(getMultiplier({ model: 'grok-4-1-fast-non-reasoning', tokenType: 'completion' })).toBe(
tokenValues['grok-4-1-fast'].completion,
);
});
test('should return correct prompt and completion rates for Grok Code Fast model', () => {
expect(getMultiplier({ model: 'grok-code-fast-1', tokenType: 'prompt' })).toBe(
tokenValues['grok-code-fast'].prompt,
);
expect(getMultiplier({ model: 'grok-code-fast-1', tokenType: 'completion' })).toBe(
tokenValues['grok-code-fast'].completion,
);
});
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
test('should return correct prompt and completion rates for Grok 3 models with prefixes', () => {
expect(getMultiplier({ model: 'xai/grok-3', tokenType: 'prompt' })).toBe(
tokenValues['grok-3'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-3', tokenType: 'completion' })).toBe(
tokenValues['grok-3'].completion,
);
expect(getMultiplier({ model: 'xai/grok-3-fast', tokenType: 'prompt' })).toBe(
tokenValues['grok-3-fast'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-3-fast', tokenType: 'completion' })).toBe(
tokenValues['grok-3-fast'].completion,
);
expect(getMultiplier({ model: 'xai/grok-3-mini', tokenType: 'prompt' })).toBe(
tokenValues['grok-3-mini'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-3-mini', tokenType: 'completion' })).toBe(
tokenValues['grok-3-mini'].completion,
);
expect(getMultiplier({ model: 'xai/grok-3-mini-fast', tokenType: 'prompt' })).toBe(
tokenValues['grok-3-mini-fast'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-3-mini-fast', tokenType: 'completion' })).toBe(
tokenValues['grok-3-mini-fast'].completion,
);
🤖 refactor: Improve Agents Memory Usage, Bump Keyv, Grok 3 (#6850) * chore: remove unused redis file * chore: bump keyv dependencies, and update related imports * refactor: Implement IoRedis client for rate limiting across middleware, as node-redis via keyv not compatible * fix: Set max listeners to expected amount * WIP: memory improvements * refactor: Simplify getAbortData assignment in createAbortController * refactor: Update getAbortData to use WeakRef for content management * WIP: memory improvements in agent chat requests * refactor: Enhance memory management with finalization registry and cleanup functions * refactor: Simplify domainParser calls by removing unnecessary request parameter * refactor: Update parameter types for action tools and agent loading functions to use minimal configs * refactor: Simplify domainParser tests by removing unnecessary request parameter * refactor: Simplify domainParser call by removing unnecessary request parameter * refactor: Enhance client disposal by nullifying additional properties to improve memory management * refactor: Improve title generation by adding abort controller and timeout handling, consolidate request cleanup * refactor: Update checkIdleConnections to skip current user when checking for idle connections if passed * refactor: Update createMCPTool to derive userId from config and handle abort signals * refactor: Introduce createTokenCounter function and update tokenCounter usage; enhance disposeClient to reset Graph values * refactor: Update getMCPManager to accept userId parameter for improved idle connection handling * refactor: Extract logToolError function for improved error handling in AgentClient * refactor: Update disposeClient to clear handlerRegistry and graphRunnable references in client.run * refactor: Extract createHandleNewToken function to streamline token handling in initializeClient * chore: bump @librechat/agents * refactor: Improve timeout handling in addTitle function for better error management * refactor: Introduce createFetch instead of using class method * refactor: Enhance client disposal and request data handling in AskController and EditController * refactor: Update import statements for AnthropicClient and OpenAIClient to use specific paths * refactor: Use WeakRef for response handling in SplitStreamHandler to prevent memory leaks * refactor: Simplify client disposal and rename getReqData to processReqData in AskController and EditController * refactor: Improve logging structure and parameter handling in OpenAIClient * refactor: Remove unused GraphEvents and improve stream event handling in AnthropicClient and OpenAIClient * refactor: Simplify client initialization in AskController and EditController * refactor: Remove unused mock functions and implement in-memory store for KeyvMongo * chore: Update dependencies in package-lock.json to latest versions * refactor: Await token usage recording in OpenAIClient to ensure proper async handling * refactor: Remove handleAbort route from multiple endpoints and enhance client disposal logic * refactor: Enhance abort controller logic by managing abortKey more effectively * refactor: Add newConversation handling in useEventHandlers for improved conversation management * fix: dropparams * refactor: Use optional chaining for safer access to request properties in BaseClient * refactor: Move client disposal and request data processing logic to cleanup module for better organization * refactor: Remove aborted request check from addTitle function for cleaner logic * feat: Add Grok 3 model pricing and update tests for new models * chore: Remove trace warnings and inspect flags from backend start script used for debugging * refactor: Replace user identifier handling with userId for consistency across controllers, use UserId in clientRegistry * refactor: Enhance client disposal logic to prevent memory leaks by clearing additional references * chore: Update @librechat/agents to version 2.4.14 in package.json and package-lock.json
2025-04-12 18:46:36 -04:00
});
test('should return correct prompt and completion rates for Grok 4 model with prefixes', () => {
expect(getMultiplier({ model: 'xai/grok-4-0709', tokenType: 'prompt' })).toBe(
tokenValues['grok-4'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-4-0709', tokenType: 'completion' })).toBe(
tokenValues['grok-4'].completion,
);
});
test('should return correct prompt and completion rates for Grok 4 Fast model with prefixes', () => {
expect(getMultiplier({ model: 'xai/grok-4-fast', tokenType: 'prompt' })).toBe(
tokenValues['grok-4-fast'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-4-fast', tokenType: 'completion' })).toBe(
tokenValues['grok-4-fast'].completion,
);
});
test('should return correct prompt and completion rates for Grok 4.1 Fast models with prefixes', () => {
expect(getMultiplier({ model: 'xai/grok-4-1-fast-reasoning', tokenType: 'prompt' })).toBe(
tokenValues['grok-4-1-fast'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-4-1-fast-reasoning', tokenType: 'completion' })).toBe(
tokenValues['grok-4-1-fast'].completion,
);
expect(getMultiplier({ model: 'xai/grok-4-1-fast-non-reasoning', tokenType: 'prompt' })).toBe(
tokenValues['grok-4-1-fast'].prompt,
);
expect(
getMultiplier({ model: 'xai/grok-4-1-fast-non-reasoning', tokenType: 'completion' }),
).toBe(tokenValues['grok-4-1-fast'].completion);
});
test('should return correct prompt and completion rates for Grok Code Fast model with prefixes', () => {
expect(getMultiplier({ model: 'xai/grok-code-fast-1', tokenType: 'prompt' })).toBe(
tokenValues['grok-code-fast'].prompt,
);
expect(getMultiplier({ model: 'xai/grok-code-fast-1', tokenType: 'completion' })).toBe(
tokenValues['grok-code-fast'].completion,
);
});
});
});
describe('GLM Model Tests', () => {
it('should return expected value keys for GLM models', () => {
expect(getValueKey('glm-4.6')).toBe('glm-4.6');
expect(getValueKey('glm-4.5')).toBe('glm-4.5');
expect(getValueKey('glm-4.5v')).toBe('glm-4.5v');
expect(getValueKey('glm-4.5-air')).toBe('glm-4.5-air');
expect(getValueKey('glm-4-32b')).toBe('glm-4-32b');
expect(getValueKey('glm-4')).toBe('glm-4');
expect(getValueKey('glm4')).toBe('glm4');
});
it('should match GLM model variations with provider prefixes', () => {
expect(getValueKey('z-ai/glm-4.6')).toBe('glm-4.6');
expect(getValueKey('z-ai/glm-4.5')).toBe('glm-4.5');
expect(getValueKey('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
expect(getValueKey('z-ai/glm-4.5v')).toBe('glm-4.5v');
expect(getValueKey('z-ai/glm-4-32b')).toBe('glm-4-32b');
expect(getValueKey('zai/glm-4.6')).toBe('glm-4.6');
expect(getValueKey('zai/glm-4.5')).toBe('glm-4.5');
expect(getValueKey('zai/glm-4.5-air')).toBe('glm-4.5-air');
expect(getValueKey('zai/glm-4.5v')).toBe('glm-4.5v');
expect(getValueKey('zai-org/GLM-4.6')).toBe('glm-4.6');
expect(getValueKey('zai-org/GLM-4.5')).toBe('glm-4.5');
expect(getValueKey('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
expect(getValueKey('zai-org/GLM-4.5V')).toBe('glm-4.5v');
expect(getValueKey('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
});
it('should match GLM model variations with suffixes', () => {
expect(getValueKey('glm-4.6-fp8')).toBe('glm-4.6');
expect(getValueKey('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
expect(getValueKey('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
});
it('should prioritize more specific GLM model patterns', () => {
expect(getValueKey('glm-4.5-air-something')).toBe('glm-4.5-air');
expect(getValueKey('glm-4.5-something')).toBe('glm-4.5');
expect(getValueKey('glm-4.5v-something')).toBe('glm-4.5v');
});
it('should return correct multipliers for all GLM models', () => {
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.6'].prompt,
);
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'completion' })).toBe(
tokenValues['glm-4.6'].completion,
);
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5v'].prompt,
);
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5v'].completion,
);
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5-air'].prompt,
);
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5-air'].completion,
);
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5'].prompt,
);
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5'].completion,
);
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'prompt' })).toBe(
tokenValues['glm-4-32b'].prompt,
);
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'completion' })).toBe(
tokenValues['glm-4-32b'].completion,
);
expect(getMultiplier({ model: 'glm-4', tokenType: 'prompt' })).toBe(
tokenValues['glm-4'].prompt,
);
expect(getMultiplier({ model: 'glm-4', tokenType: 'completion' })).toBe(
tokenValues['glm-4'].completion,
);
expect(getMultiplier({ model: 'glm4', tokenType: 'prompt' })).toBe(tokenValues['glm4'].prompt);
expect(getMultiplier({ model: 'glm4', tokenType: 'completion' })).toBe(
tokenValues['glm4'].completion,
);
});
it('should return correct multipliers for GLM models with provider prefixes', () => {
expect(getMultiplier({ model: 'z-ai/glm-4.6', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.6'].prompt,
);
expect(getMultiplier({ model: 'zai/glm-4.5-air', tokenType: 'completion' })).toBe(
tokenValues['glm-4.5-air'].completion,
);
expect(getMultiplier({ model: 'zai-org/GLM-4.5V', tokenType: 'prompt' })).toBe(
tokenValues['glm-4.5v'].prompt,
);
});
});
describe('Claude Model Tests', () => {
it('should return correct prompt and completion rates for Claude 4 models', () => {
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'prompt' })).toBe(
tokenValues['claude-sonnet-4'].prompt,
);
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'completion' })).toBe(
tokenValues['claude-sonnet-4'].completion,
);
expect(getMultiplier({ model: 'claude-opus-4', tokenType: 'prompt' })).toBe(
tokenValues['claude-opus-4'].prompt,
);
expect(getMultiplier({ model: 'claude-opus-4', tokenType: 'completion' })).toBe(
tokenValues['claude-opus-4'].completion,
);
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should return correct prompt and completion rates for Claude Haiku 4.5', () => {
expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'prompt' })).toBe(
tokenValues['claude-haiku-4-5'].prompt,
);
expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'completion' })).toBe(
tokenValues['claude-haiku-4-5'].completion,
);
});
it('should return correct prompt and completion rates for Claude Opus 4.5', () => {
expect(getMultiplier({ model: 'claude-opus-4-5', tokenType: 'prompt' })).toBe(
tokenValues['claude-opus-4-5'].prompt,
);
expect(getMultiplier({ model: 'claude-opus-4-5', tokenType: 'completion' })).toBe(
tokenValues['claude-opus-4-5'].completion,
);
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
it('should handle Claude Haiku 4.5 model name variations', () => {
const modelVariations = [
'claude-haiku-4-5',
'claude-haiku-4-5-20250420',
'claude-haiku-4-5-latest',
'anthropic/claude-haiku-4-5',
'claude-haiku-4-5/anthropic',
'claude-haiku-4-5-preview',
];
modelVariations.forEach((model) => {
const valueKey = getValueKey(model);
expect(valueKey).toBe('claude-haiku-4-5');
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
tokenValues['claude-haiku-4-5'].prompt,
);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
tokenValues['claude-haiku-4-5'].completion,
);
});
});
it('should handle Claude Opus 4.5 model name variations', () => {
const modelVariations = [
'claude-opus-4-5',
'claude-opus-4-5-20250420',
'claude-opus-4-5-latest',
'anthropic/claude-opus-4-5',
'claude-opus-4-5/anthropic',
'claude-opus-4-5-preview',
];
modelVariations.forEach((model) => {
const valueKey = getValueKey(model);
expect(valueKey).toBe('claude-opus-4-5');
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
tokenValues['claude-opus-4-5'].prompt,
);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
tokenValues['claude-opus-4-5'].completion,
);
});
});
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
const modelVariations = [
'claude-sonnet-4',
'claude-sonnet-4-20240229',
'claude-sonnet-4-latest',
'anthropic/claude-sonnet-4',
'claude-sonnet-4/anthropic',
'claude-sonnet-4-preview',
'claude-sonnet-4-20240229-preview',
'claude-opus-4',
'claude-opus-4-20240229',
'claude-opus-4-latest',
'anthropic/claude-opus-4',
'claude-opus-4/anthropic',
'claude-opus-4-preview',
'claude-opus-4-20240229-preview',
];
modelVariations.forEach((model) => {
const valueKey = getValueKey(model);
const isSonnet = model.includes('sonnet');
const expectedKey = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
expect(valueKey).toBe(expectedKey);
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues[expectedKey].prompt);
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
tokenValues[expectedKey].completion,
);
});
});
it('should return correct cache rates for Claude 4 models', () => {
expect(getCacheMultiplier({ model: 'claude-sonnet-4', cacheType: 'write' })).toBe(
cacheTokenValues['claude-sonnet-4'].write,
);
expect(getCacheMultiplier({ model: 'claude-sonnet-4', cacheType: 'read' })).toBe(
cacheTokenValues['claude-sonnet-4'].read,
);
expect(getCacheMultiplier({ model: 'claude-opus-4', cacheType: 'write' })).toBe(
cacheTokenValues['claude-opus-4'].write,
);
expect(getCacheMultiplier({ model: 'claude-opus-4', cacheType: 'read' })).toBe(
cacheTokenValues['claude-opus-4'].read,
);
});
it('should return correct cache rates for Claude Opus 4.5', () => {
expect(getCacheMultiplier({ model: 'claude-opus-4-5', cacheType: 'write' })).toBe(
cacheTokenValues['claude-opus-4-5'].write,
);
expect(getCacheMultiplier({ model: 'claude-opus-4-5', cacheType: 'read' })).toBe(
cacheTokenValues['claude-opus-4-5'].read,
);
});
it('should handle Claude 4 model cache rates with different prefixes and suffixes', () => {
const modelVariations = [
'claude-sonnet-4',
'claude-sonnet-4-20240229',
'claude-sonnet-4-latest',
'anthropic/claude-sonnet-4',
'claude-sonnet-4/anthropic',
'claude-sonnet-4-preview',
'claude-sonnet-4-20240229-preview',
'claude-opus-4',
'claude-opus-4-20240229',
'claude-opus-4-latest',
'anthropic/claude-opus-4',
'claude-opus-4/anthropic',
'claude-opus-4-preview',
'claude-opus-4-20240229-preview',
];
modelVariations.forEach((model) => {
const isSonnet = model.includes('sonnet');
const expectedKey = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
cacheTokenValues[expectedKey].write,
);
expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
cacheTokenValues[expectedKey].read,
);
});
});
});
🧮 feat: Enhance Model Pricing Coverage and Pattern Matching (#10173) * updated gpt5-pro it is here and on openrouter https://platform.openai.com/docs/models/gpt-5-pro * feat: Add gpt-5-pro pricing - Implemented handling for the new gpt-5-pro model in the getValueKey function. - Updated tests to ensure correct behavior for gpt-5-pro across various scenarios. - Adjusted token limits and multipliers for gpt-5-pro in the tokens utility files. - Enhanced model matching functionality to include gpt-5-pro variations. * refactor: optimize model pricing and validation logic - Added new model pricing entries for llama2, llama3, and qwen variants in tx.js. - Updated tokenValues to include additional models and their pricing structures. - Implemented validation tests in tx.spec.js to ensure all models resolve correctly to pricing. - Refactored getValueKey function to improve model matching and resolution efficiency. - Removed outdated model entries from tokens.ts to streamline pricing management. * fix: add missing pricing * chore: update model pricing for qwen and gemma variants * chore: update model pricing and add validation for context windows - Removed outdated model entries from tx.js and updated tokenValues with new models. - Added a test in tx.spec.js to ensure all models with pricing have corresponding context windows defined in tokens.ts. - Introduced 'command-text' model pricing in tokens.ts to maintain consistency across model definitions. * chore: update model names and pricing for AI21 and Amazon models - Refactored model names in tx.js for AI21 and Amazon models to remove versioning and improve consistency. - Updated pricing values in tokens.ts to reflect the new model names. - Added comprehensive tests in tx.spec.js to validate pricing for both short and full model names across AI21 and Amazon models. * feat: add pricing and validation for Claude Haiku 4.5 model * chore: increase default max context tokens to 18000 for agents * feat: add Qwen3 model pricing and validation tests * chore: reorganize and update Qwen model pricing in tx.js and tokens.ts --------- Co-authored-by: khfung <68192841+khfung@users.noreply.github.com>
2025-10-19 09:23:27 -04:00
describe('tokens.ts and tx.js sync validation', () => {
it('should resolve all models in maxTokensMap to pricing via getValueKey', () => {
const tokensKeys = Object.keys(maxTokensMap[EModelEndpoint.openAI]);
const txKeys = Object.keys(tokenValues);
const unresolved = [];
tokensKeys.forEach((key) => {
// Skip legacy token size mappings (e.g., '4k', '8k', '16k', '32k')
if (/^\d+k$/.test(key)) return;
// Skip generic pattern keys (end with '-' or ':')
if (key.endsWith('-') || key.endsWith(':')) return;
// Try to resolve via getValueKey
const resolvedKey = getValueKey(key);
// If it resolves and the resolved key has pricing, success
if (resolvedKey && txKeys.includes(resolvedKey)) return;
// If it resolves to a legacy key (4k, 8k, etc), also OK
if (resolvedKey && /^\d+k$/.test(resolvedKey)) return;
// If we get here, this model can't get pricing - flag it
unresolved.push({
key,
resolvedKey: resolvedKey || 'undefined',
context: maxTokensMap[EModelEndpoint.openAI][key],
});
});
if (unresolved.length > 0) {
console.log('\nModels that cannot resolve to pricing via getValueKey:');
unresolved.forEach(({ key, resolvedKey, context }) => {
console.log(` - '${key}' → '${resolvedKey}' (context: ${context})`);
});
}
expect(unresolved).toEqual([]);
});
it('should not have redundant dated variants with same pricing and context as base model', () => {
const txKeys = Object.keys(tokenValues);
const redundant = [];
txKeys.forEach((key) => {
// Check if this is a dated variant (ends with -YYYY-MM-DD)
if (key.match(/.*-\d{4}-\d{2}-\d{2}$/)) {
const baseKey = key.replace(/-\d{4}-\d{2}-\d{2}$/, '');
if (txKeys.includes(baseKey)) {
const variantPricing = tokenValues[key];
const basePricing = tokenValues[baseKey];
const variantContext = maxTokensMap[EModelEndpoint.openAI][key];
const baseContext = maxTokensMap[EModelEndpoint.openAI][baseKey];
const samePricing =
variantPricing.prompt === basePricing.prompt &&
variantPricing.completion === basePricing.completion;
const sameContext = variantContext === baseContext;
if (samePricing && sameContext) {
redundant.push({
key,
baseKey,
pricing: `${variantPricing.prompt}/${variantPricing.completion}`,
context: variantContext,
});
}
}
}
});
if (redundant.length > 0) {
console.log('\nRedundant dated variants found (same pricing and context as base):');
redundant.forEach(({ key, baseKey, pricing, context }) => {
console.log(` - '${key}' → '${baseKey}' (pricing: ${pricing}, context: ${context})`);
console.log(` Can be removed - pattern matching will handle it`);
});
}
expect(redundant).toEqual([]);
});
it('should have context windows in tokens.ts for all models with pricing in tx.js (openAI catch-all)', () => {
const txKeys = Object.keys(tokenValues);
const missingContext = [];
txKeys.forEach((key) => {
// Skip legacy token size mappings (4k, 8k, 16k, 32k)
if (/^\d+k$/.test(key)) return;
// Check if this model has a context window defined
const context = maxTokensMap[EModelEndpoint.openAI][key];
if (!context) {
const pricing = tokenValues[key];
missingContext.push({
key,
pricing: `${pricing.prompt}/${pricing.completion}`,
});
}
});
if (missingContext.length > 0) {
console.log('\nModels with pricing but missing context in tokens.ts:');
missingContext.forEach(({ key, pricing }) => {
console.log(` - '${key}' (pricing: ${pricing})`);
console.log(` Add to tokens.ts openAIModels/bedrockModels/etc.`);
});
}
expect(missingContext).toEqual([]);
});
});