2025-10-19 09:23:27 -04:00
|
|
|
const { maxTokensMap } = require('@librechat/api');
|
🪨 feat: AWS Bedrock support (#3935)
* feat: Add BedrockIcon component to SVG library
* feat: EModelEndpoint.bedrock
* feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint
* fix: declare endpoint in initialization step
* chore: Update @librechat/agents dependency to version 1.4.5
* feat: backend content aggregation for agents/bedrock
* feat: abort agent requests
* feat: AWS Bedrock icons
* WIP: agent provider schema parsing
* chore: Update EditIcon props type
* refactor(useGenerationsByLatest): make agents and bedrock editable
* refactor: non-assistant message content, parts
* fix: Bedrock response `sender`
* fix: use endpointOption.model_parameters not endpointOption.modelOptions
* fix: types for step handler
* refactor: Update Agents.ToolCallDelta type
* refactor: Remove unnecessary assignment of parentMessageId in AskController
* refactor: remove unnecessary assignment of parentMessageId (agent request handler)
* fix(bedrock/agents): message regeneration
* refactor: dynamic form elements using react-hook-form Controllers
* fix: agent icons/labels for messages
* fix: agent actions
* fix: use of new dynamic tags causing application crash
* refactor: dynamic settings touch-ups
* refactor: update Slider component to allow custom track class name
* refactor: update DynamicSlider component styles
* refactor: use Constants value for GLOBAL_PROJECT_NAME (enum)
* feat: agent share global methods/controllers
* fix: agents query
* fix: `getResponseModel`
* fix: share prompt a11y issue
* refactor: update SharePrompt dialog theme styles
* refactor: explicit typing for SharePrompt
* feat: add agent roles/permissions
* chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case
* fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary
* fix: handle text parts with tool_call_ids and empty text
* fix: role initialization
* refactor: don't make instructions required
* refactor: improve typing of Text part
* fix: setShowStopButton for agents route
* chore: remove params for now
* fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API
* refactor: remove console.log statement in ContentRender component
* chore: typing, rename Context to Delete Button
* chore(DeleteButton): logging
* refactor(Action): make accessible
* style(Action): improve a11y again
* refactor: remove use/mention of mongoose sessions
* feat: first pass, sharing agents
* feat: visual indicator for global agent, remove author when serving to non-author
* wip: params
* chore: fix typing issues
* fix(schemas): typing
* refactor: improve accessibility of ListCard component and fix console React warning
* wip: reset templates for non-legacy new convos
* Revert "wip: params"
This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80.
* Revert "refactor: dynamic form elements using react-hook-form Controllers"
This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7.
* fix(Parameters): types and parameter effect update to only update local state to parameters
* refactor: optimize useDebouncedInput hook for better performance
* feat: first pass, anthropic bedrock params
* chore: paramEndpoints check for endpointType too
* fix: maxTokens to use coerceNumber.optional(),
* feat: extra chat model params
* chore: reduce code repetition
* refactor: improve preset title handling in SaveAsPresetDialog component
* refactor: improve preset handling in HeaderOptions component
* chore: improve typing, replace legacy dialog for SaveAsPresetDialog
* feat: save as preset from parameters panel
* fix: multi-search in select dropdown when using Option type
* refactor: update default showDefault value to false in Dynamic components
* feat: Bedrock presets settings
* chore: config, fix agents schema, update config version
* refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION
* refactor: update baseEndpointSchema in config.ts to include baseURL property
* refactor: update createRun function to include req parameter and set streamRate based on provider
* feat: availableRegions via config
* refactor: remove unused demo agent controller file
* WIP: title
* Update @librechat/agents to version 1.5.0
* chore: addTitle.js to handle empty responseText
* feat: support images and titles
* feat: context token updates
* Refactor BaseClient test to use expect.objectContaining
* refactor: add model select, remove header options params, move side panel params below prompts
* chore: update models list, catch title error
* feat: model service for bedrock models (env)
* chore: Remove verbose debug log in AgentClient class following stream
* feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models
* refactor: handle streamRate in `handleLLMNewToken` callback
* chore: AWS Bedrock example config in `.env.example`
* refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers
* refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes
* feat: titleModel support for bedrock
* refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
|
|
|
const { EModelEndpoint } = require('librechat-data-provider');
|
2024-08-17 03:24:09 -04:00
|
|
|
const {
|
|
|
|
|
defaultRate,
|
|
|
|
|
tokenValues,
|
|
|
|
|
getValueKey,
|
|
|
|
|
getMultiplier,
|
2024-12-03 22:25:15 -05:00
|
|
|
cacheTokenValues,
|
2024-08-17 03:24:09 -04:00
|
|
|
getCacheMultiplier,
|
|
|
|
|
} = require('./tx');
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
|
|
|
|
|
describe('getValueKey', () => {
|
|
|
|
|
it('should return "16k" for model name containing "gpt-3.5-turbo-16k"', () => {
|
|
|
|
|
expect(getValueKey('gpt-3.5-turbo-16k-some-other-info')).toBe('16k');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "4k" for model name containing "gpt-3.5"', () => {
|
|
|
|
|
expect(getValueKey('gpt-3.5-some-other-info')).toBe('4k');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "32k" for model name containing "gpt-4-32k"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4-32k-some-other-info')).toBe('32k');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "8k" for model name containing "gpt-4"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4-some-other-info')).toBe('8k');
|
|
|
|
|
});
|
|
|
|
|
|
2025-08-07 16:01:29 -04:00
|
|
|
it('should return "gpt-5" for model name containing "gpt-5"', () => {
|
|
|
|
|
expect(getValueKey('gpt-5-some-other-info')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30-0130')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('openai/gpt-5')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('openai/gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-turbo')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-0130')).toBe('gpt-5');
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
2023-11-06 15:26:16 -05:00
|
|
|
|
|
|
|
|
it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => {
|
|
|
|
|
expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106');
|
|
|
|
|
expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
|
|
|
|
|
expect(getValueKey('gpt-3.5-turbo-1106/openai')).toBe('gpt-3.5-turbo-1106');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "gpt-4-1106" for model name containing "gpt-4-1106"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4-1106-some-other-info')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('gpt-4-1106-vision-preview')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('gpt-4-1106-preview')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('openai/gpt-4-1106')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('gpt-4-1106/openai/')).toBe('gpt-4-1106');
|
|
|
|
|
});
|
2024-04-23 08:57:20 -04:00
|
|
|
|
|
|
|
|
it('should return "gpt-4-1106" for model type of "gpt-4-1106"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4-vision-preview')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('openai/gpt-4-1106')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('gpt-4-turbo')).toBe('gpt-4-1106');
|
|
|
|
|
expect(getValueKey('gpt-4-0125')).toBe('gpt-4-1106');
|
|
|
|
|
});
|
2024-05-13 14:25:02 -04:00
|
|
|
|
2025-02-28 12:19:21 -05:00
|
|
|
it('should return "gpt-4.5" for model type of "gpt-4.5"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4.5-preview')).toBe('gpt-4.5');
|
|
|
|
|
expect(getValueKey('gpt-4.5-2024-08-06')).toBe('gpt-4.5');
|
|
|
|
|
expect(getValueKey('gpt-4.5-2024-08-06-0718')).toBe('gpt-4.5');
|
|
|
|
|
expect(getValueKey('openai/gpt-4.5')).toBe('gpt-4.5');
|
|
|
|
|
expect(getValueKey('openai/gpt-4.5-2024-08-06')).toBe('gpt-4.5');
|
|
|
|
|
expect(getValueKey('gpt-4.5-turbo')).toBe('gpt-4.5');
|
|
|
|
|
expect(getValueKey('gpt-4.5-0125')).toBe('gpt-4.5');
|
|
|
|
|
});
|
|
|
|
|
|
2025-04-14 14:55:59 -04:00
|
|
|
it('should return "gpt-4.1" for model type of "gpt-4.1"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4.1-preview')).toBe('gpt-4.1');
|
|
|
|
|
expect(getValueKey('gpt-4.1-2024-08-06')).toBe('gpt-4.1');
|
|
|
|
|
expect(getValueKey('gpt-4.1-2024-08-06-0718')).toBe('gpt-4.1');
|
|
|
|
|
expect(getValueKey('openai/gpt-4.1')).toBe('gpt-4.1');
|
|
|
|
|
expect(getValueKey('openai/gpt-4.1-2024-08-06')).toBe('gpt-4.1');
|
|
|
|
|
expect(getValueKey('gpt-4.1-turbo')).toBe('gpt-4.1');
|
|
|
|
|
expect(getValueKey('gpt-4.1-0125')).toBe('gpt-4.1');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "gpt-4.1-mini" for model type of "gpt-4.1-mini"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4.1-mini-preview')).toBe('gpt-4.1-mini');
|
|
|
|
|
expect(getValueKey('gpt-4.1-mini-2024-08-06')).toBe('gpt-4.1-mini');
|
|
|
|
|
expect(getValueKey('openai/gpt-4.1-mini')).toBe('gpt-4.1-mini');
|
|
|
|
|
expect(getValueKey('gpt-4.1-mini-0125')).toBe('gpt-4.1-mini');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "gpt-4.1-nano" for model type of "gpt-4.1-nano"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4.1-nano-preview')).toBe('gpt-4.1-nano');
|
|
|
|
|
expect(getValueKey('gpt-4.1-nano-2024-08-06')).toBe('gpt-4.1-nano');
|
|
|
|
|
expect(getValueKey('openai/gpt-4.1-nano')).toBe('gpt-4.1-nano');
|
|
|
|
|
expect(getValueKey('gpt-4.1-nano-0125')).toBe('gpt-4.1-nano');
|
|
|
|
|
});
|
|
|
|
|
|
2025-08-07 16:01:29 -04:00
|
|
|
it('should return "gpt-5" for model type of "gpt-5"', () => {
|
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30-0130')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('openai/gpt-5')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('openai/gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-turbo')).toBe('gpt-5');
|
|
|
|
|
expect(getValueKey('gpt-5-0130')).toBe('gpt-5');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "gpt-5-mini" for model type of "gpt-5-mini"', () => {
|
|
|
|
|
expect(getValueKey('gpt-5-mini-2025-01-30')).toBe('gpt-5-mini');
|
|
|
|
|
expect(getValueKey('openai/gpt-5-mini')).toBe('gpt-5-mini');
|
|
|
|
|
expect(getValueKey('gpt-5-mini-0130')).toBe('gpt-5-mini');
|
|
|
|
|
expect(getValueKey('gpt-5-mini-2025-01-30-0130')).toBe('gpt-5-mini');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "gpt-5-nano" for model type of "gpt-5-nano"', () => {
|
|
|
|
|
expect(getValueKey('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
|
|
|
|
|
expect(getValueKey('openai/gpt-5-nano')).toBe('gpt-5-nano');
|
|
|
|
|
expect(getValueKey('gpt-5-nano-0130')).toBe('gpt-5-nano');
|
|
|
|
|
expect(getValueKey('gpt-5-nano-2025-01-30-0130')).toBe('gpt-5-nano');
|
|
|
|
|
});
|
|
|
|
|
|
2025-10-19 09:23:27 -04:00
|
|
|
it('should return "gpt-5-pro" for model type of "gpt-5-pro"', () => {
|
|
|
|
|
expect(getValueKey('gpt-5-pro-2025-01-30')).toBe('gpt-5-pro');
|
|
|
|
|
expect(getValueKey('openai/gpt-5-pro')).toBe('gpt-5-pro');
|
|
|
|
|
expect(getValueKey('gpt-5-pro-0130')).toBe('gpt-5-pro');
|
|
|
|
|
expect(getValueKey('gpt-5-pro-2025-01-30-0130')).toBe('gpt-5-pro');
|
|
|
|
|
expect(getValueKey('gpt-5-pro-preview')).toBe('gpt-5-pro');
|
|
|
|
|
});
|
|
|
|
|
|
2024-05-13 14:25:02 -04:00
|
|
|
it('should return "gpt-4o" for model type of "gpt-4o"', () => {
|
2024-10-11 05:27:29 -07:00
|
|
|
expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o');
|
|
|
|
|
expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o');
|
2024-05-13 14:25:02 -04:00
|
|
|
expect(getValueKey('openai/gpt-4o')).toBe('gpt-4o');
|
2024-10-11 05:27:29 -07:00
|
|
|
expect(getValueKey('openai/gpt-4o-2024-08-06')).toBe('gpt-4o');
|
2024-05-13 14:25:02 -04:00
|
|
|
expect(getValueKey('gpt-4o-turbo')).toBe('gpt-4o');
|
|
|
|
|
expect(getValueKey('gpt-4o-0125')).toBe('gpt-4o');
|
|
|
|
|
});
|
2024-06-20 20:48:15 -04:00
|
|
|
|
2024-07-19 13:59:07 +02:00
|
|
|
it('should return "gpt-4o-mini" for model type of "gpt-4o-mini"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4o-mini-2024-07-18')).toBe('gpt-4o-mini');
|
|
|
|
|
expect(getValueKey('openai/gpt-4o-mini')).toBe('gpt-4o-mini');
|
|
|
|
|
expect(getValueKey('gpt-4o-mini-0718')).toBe('gpt-4o-mini');
|
2024-10-11 05:27:29 -07:00
|
|
|
expect(getValueKey('gpt-4o-2024-08-06-0718')).not.toBe('gpt-4o-mini');
|
2024-08-08 23:31:07 -04:00
|
|
|
});
|
|
|
|
|
|
2024-10-11 05:27:29 -07:00
|
|
|
it('should return "gpt-4o-2024-05-13" for model type of "gpt-4o-2024-05-13"', () => {
|
|
|
|
|
expect(getValueKey('gpt-4o-2024-05-13')).toBe('gpt-4o-2024-05-13');
|
|
|
|
|
expect(getValueKey('openai/gpt-4o-2024-05-13')).toBe('gpt-4o-2024-05-13');
|
|
|
|
|
expect(getValueKey('gpt-4o-2024-05-13-0718')).toBe('gpt-4o-2024-05-13');
|
|
|
|
|
expect(getValueKey('gpt-4o-2024-05-13-0718')).not.toBe('gpt-4o');
|
2024-07-19 13:59:07 +02:00
|
|
|
});
|
|
|
|
|
|
2024-08-16 15:28:17 -04:00
|
|
|
it('should return "gpt-4o" for model type of "chatgpt-4o"', () => {
|
|
|
|
|
expect(getValueKey('chatgpt-4o-latest')).toBe('gpt-4o');
|
|
|
|
|
expect(getValueKey('openai/chatgpt-4o-latest')).toBe('gpt-4o');
|
|
|
|
|
expect(getValueKey('chatgpt-4o-latest-0916')).toBe('gpt-4o');
|
|
|
|
|
expect(getValueKey('chatgpt-4o-latest-0718')).toBe('gpt-4o');
|
|
|
|
|
});
|
|
|
|
|
|
2025-02-24 20:08:55 -05:00
|
|
|
it('should return "claude-3-7-sonnet" for model type of "claude-3-7-sonnet-"', () => {
|
|
|
|
|
expect(getValueKey('claude-3-7-sonnet-20240620')).toBe('claude-3-7-sonnet');
|
|
|
|
|
expect(getValueKey('anthropic/claude-3-7-sonnet')).toBe('claude-3-7-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3-7-sonnet-turbo')).toBe('claude-3-7-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3-7-sonnet-0125')).toBe('claude-3-7-sonnet');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "claude-3.7-sonnet" for model type of "claude-3.7-sonnet-"', () => {
|
|
|
|
|
expect(getValueKey('claude-3.7-sonnet-20240620')).toBe('claude-3.7-sonnet');
|
|
|
|
|
expect(getValueKey('anthropic/claude-3.7-sonnet')).toBe('claude-3.7-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3.7-sonnet-turbo')).toBe('claude-3.7-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3.7-sonnet-0125')).toBe('claude-3.7-sonnet');
|
|
|
|
|
});
|
|
|
|
|
|
2024-06-20 20:48:15 -04:00
|
|
|
it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => {
|
|
|
|
|
expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet');
|
|
|
|
|
expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3-5-sonnet-turbo')).toBe('claude-3-5-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3-5-sonnet-0125')).toBe('claude-3-5-sonnet');
|
|
|
|
|
});
|
2024-08-27 09:07:04 -04:00
|
|
|
|
|
|
|
|
it('should return "claude-3.5-sonnet" for model type of "claude-3.5-sonnet-"', () => {
|
|
|
|
|
expect(getValueKey('claude-3.5-sonnet-20240620')).toBe('claude-3.5-sonnet');
|
|
|
|
|
expect(getValueKey('anthropic/claude-3.5-sonnet')).toBe('claude-3.5-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3.5-sonnet-turbo')).toBe('claude-3.5-sonnet');
|
|
|
|
|
expect(getValueKey('claude-3.5-sonnet-0125')).toBe('claude-3.5-sonnet');
|
|
|
|
|
});
|
2024-11-04 15:10:24 -05:00
|
|
|
|
|
|
|
|
it('should return "claude-3-5-haiku" for model type of "claude-3-5-haiku-"', () => {
|
|
|
|
|
expect(getValueKey('claude-3-5-haiku-20240620')).toBe('claude-3-5-haiku');
|
|
|
|
|
expect(getValueKey('anthropic/claude-3-5-haiku')).toBe('claude-3-5-haiku');
|
|
|
|
|
expect(getValueKey('claude-3-5-haiku-turbo')).toBe('claude-3-5-haiku');
|
|
|
|
|
expect(getValueKey('claude-3-5-haiku-0125')).toBe('claude-3-5-haiku');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return "claude-3.5-haiku" for model type of "claude-3.5-haiku-"', () => {
|
|
|
|
|
expect(getValueKey('claude-3.5-haiku-20240620')).toBe('claude-3.5-haiku');
|
|
|
|
|
expect(getValueKey('anthropic/claude-3.5-haiku')).toBe('claude-3.5-haiku');
|
|
|
|
|
expect(getValueKey('claude-3.5-haiku-turbo')).toBe('claude-3.5-haiku');
|
|
|
|
|
expect(getValueKey('claude-3.5-haiku-0125')).toBe('claude-3.5-haiku');
|
|
|
|
|
});
|
2025-10-05 07:02:09 -04:00
|
|
|
|
|
|
|
|
it('should return expected value keys for "gpt-oss" models', () => {
|
|
|
|
|
expect(getValueKey('openai/gpt-oss-120b')).toBe('gpt-oss-120b');
|
|
|
|
|
expect(getValueKey('openai/gpt-oss:120b')).toBe('gpt-oss:120b');
|
|
|
|
|
expect(getValueKey('openai/gpt-oss-570b')).toBe('gpt-oss');
|
|
|
|
|
expect(getValueKey('gpt-oss-570b')).toBe('gpt-oss');
|
|
|
|
|
expect(getValueKey('groq/gpt-oss-1080b')).toBe('gpt-oss');
|
|
|
|
|
expect(getValueKey('gpt-oss-20b')).toBe('gpt-oss-20b');
|
|
|
|
|
expect(getValueKey('oai/gpt-oss:20b')).toBe('gpt-oss:20b');
|
|
|
|
|
});
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('getMultiplier', () => {
|
|
|
|
|
it('should return the correct multiplier for a given valueKey and tokenType', () => {
|
2023-11-06 15:26:16 -05:00
|
|
|
expect(getMultiplier({ valueKey: '8k', tokenType: 'prompt' })).toBe(tokenValues['8k'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey: '8k', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['8k'].completion,
|
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
2025-04-17 00:40:26 -04:00
|
|
|
it('should return correct multipliers for o4-mini and o3', () => {
|
|
|
|
|
['o4-mini', 'o3'].forEach((model) => {
|
|
|
|
|
const prompt = getMultiplier({ model, tokenType: 'prompt' });
|
|
|
|
|
const completion = getMultiplier({ model, tokenType: 'completion' });
|
|
|
|
|
expect(prompt).toBe(tokenValues[model].prompt);
|
|
|
|
|
expect(completion).toBe(tokenValues[model].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2023-10-06 13:21:44 -04:00
|
|
|
it('should return defaultRate if tokenType is provided but not found in tokenValues', () => {
|
|
|
|
|
expect(getMultiplier({ valueKey: '8k', tokenType: 'unknownType' })).toBe(defaultRate);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should derive the valueKey from the model if not provided', () => {
|
2023-11-06 15:26:16 -05:00
|
|
|
expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-4-some-other-info' })).toBe(
|
|
|
|
|
tokenValues['8k'].prompt,
|
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return 1 if only model or tokenType is missing', () => {
|
|
|
|
|
expect(getMultiplier({ tokenType: 'prompt' })).toBe(1);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-4-some-other-info' })).toBe(1);
|
|
|
|
|
});
|
|
|
|
|
|
2023-11-06 15:26:16 -05:00
|
|
|
it('should return the correct multiplier for gpt-3.5-turbo-1106', () => {
|
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-3.5-turbo-1106', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-3.5-turbo-1106'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-3.5-turbo-1106', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-3.5-turbo-1106'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-4-1106', () => {
|
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-4-1106', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-4-1106', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-08-07 16:01:29 -04:00
|
|
|
it('should return the correct multiplier for gpt-5', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-5-2025-01-30');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-5'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-5-mini', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-5-mini-2025-01-30');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-mini'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-mini-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-mini'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5-mini', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-5-nano', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-5-nano-2025-01-30');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-nano'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-nano'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-nano-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-nano'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5-nano', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-nano'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-10-19 09:23:27 -04:00
|
|
|
it('should return the correct multiplier for gpt-5-pro', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-5-pro-2025-01-30');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-pro'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-pro'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-pro-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-pro'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5-pro', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-5-pro'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2024-05-13 14:25:02 -04:00
|
|
|
it('should return the correct multiplier for gpt-4o', () => {
|
2024-10-11 05:27:29 -07:00
|
|
|
const valueKey = getValueKey('gpt-4o-2024-08-06');
|
2024-05-13 14:25:02 -04:00
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4o'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-04-14 14:55:59 -04:00
|
|
|
it('should return the correct multiplier for gpt-4.1', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-4.1-2024-08-06');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4.1'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-4.1-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-4.1', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-4.1-mini', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-4.1-mini-2024-08-06');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-mini'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-4.1-mini-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-mini'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-4.1-mini', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-4.1-nano', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-4.1-nano-2024-08-06');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-nano'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-nano'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'gpt-4.1-nano-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-nano'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-4.1-nano', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4.1-nano'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2024-07-19 13:59:07 +02:00
|
|
|
it('should return the correct multiplier for gpt-4o-mini', () => {
|
|
|
|
|
const valueKey = getValueKey('gpt-4o-mini-2024-07-18');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['gpt-4o-mini'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4o-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2024-08-16 15:28:17 -04:00
|
|
|
it('should return the correct multiplier for chatgpt-4o-latest', () => {
|
|
|
|
|
const valueKey = getValueKey('chatgpt-4o-latest');
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['gpt-4o'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
|
|
|
|
|
tokenValues['gpt-4o-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2023-11-06 15:26:16 -05:00
|
|
|
it('should derive the valueKey from the model if not provided for new models', () => {
|
|
|
|
|
expect(
|
|
|
|
|
getMultiplier({ tokenType: 'prompt', model: 'gpt-3.5-turbo-1106-some-other-info' }),
|
|
|
|
|
).toBe(tokenValues['gpt-3.5-turbo-1106'].prompt);
|
|
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-1106-vision-preview' })).toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
|
);
|
2024-01-25 22:57:18 -05:00
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-0125-preview' })).toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-turbo-vision-preview' })).toBe(
|
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
|
);
|
2024-02-02 01:01:11 -05:00
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-3.5-turbo-0125' })).toBe(
|
|
|
|
|
tokenValues['gpt-3.5-turbo-0125'].completion,
|
|
|
|
|
);
|
2023-11-06 15:26:16 -05:00
|
|
|
});
|
|
|
|
|
|
2023-10-06 13:21:44 -04:00
|
|
|
it('should return defaultRate if derived valueKey does not match any known patterns', () => {
|
2025-08-07 16:01:29 -04:00
|
|
|
expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-10-some-other-info' })).toBe(
|
2023-10-06 13:21:44 -04:00
|
|
|
defaultRate,
|
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
2025-08-07 15:03:19 -04:00
|
|
|
|
|
|
|
|
it('should return correct multipliers for GPT-OSS models', () => {
|
|
|
|
|
const models = ['gpt-oss-20b', 'gpt-oss-120b'];
|
|
|
|
|
models.forEach((key) => {
|
|
|
|
|
const expectedPrompt = tokenValues[key].prompt;
|
|
|
|
|
const expectedCompletion = tokenValues[key].completion;
|
|
|
|
|
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
|
|
|
|
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
|
|
|
|
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
|
|
|
|
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
|
|
|
|
});
|
|
|
|
|
});
|
2025-10-05 09:08:29 -04:00
|
|
|
|
|
|
|
|
it('should return correct multipliers for GLM models', () => {
|
|
|
|
|
const models = ['glm-4.6', 'glm-4.5v', 'glm-4.5-air', 'glm-4.5', 'glm-4-32b', 'glm-4', 'glm4'];
|
|
|
|
|
models.forEach((key) => {
|
|
|
|
|
const expectedPrompt = tokenValues[key].prompt;
|
|
|
|
|
const expectedCompletion = tokenValues[key].completion;
|
|
|
|
|
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
|
|
|
|
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
|
|
|
|
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
|
|
|
|
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
|
|
|
|
});
|
|
|
|
|
});
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
2024-08-08 23:31:07 -04:00
|
|
|
|
|
|
|
|
describe('AWS Bedrock Model Tests', () => {
|
|
|
|
|
const awsModels = [
|
2024-12-03 22:25:15 -05:00
|
|
|
'anthropic.claude-3-5-haiku-20241022-v1:0',
|
2024-08-08 23:31:07 -04:00
|
|
|
'anthropic.claude-3-haiku-20240307-v1:0',
|
|
|
|
|
'anthropic.claude-3-sonnet-20240229-v1:0',
|
|
|
|
|
'anthropic.claude-3-opus-20240229-v1:0',
|
|
|
|
|
'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
|
|
|
|
'anthropic.claude-v2:1',
|
|
|
|
|
'anthropic.claude-instant-v1',
|
|
|
|
|
'meta.llama2-13b-chat-v1',
|
|
|
|
|
'meta.llama2-70b-chat-v1',
|
|
|
|
|
'meta.llama3-8b-instruct-v1:0',
|
|
|
|
|
'meta.llama3-70b-instruct-v1:0',
|
|
|
|
|
'meta.llama3-1-8b-instruct-v1:0',
|
|
|
|
|
'meta.llama3-1-70b-instruct-v1:0',
|
|
|
|
|
'meta.llama3-1-405b-instruct-v1:0',
|
|
|
|
|
'mistral.mistral-7b-instruct-v0:2',
|
|
|
|
|
'mistral.mistral-small-2402-v1:0',
|
|
|
|
|
'mistral.mixtral-8x7b-instruct-v0:1',
|
|
|
|
|
'mistral.mistral-large-2402-v1:0',
|
|
|
|
|
'mistral.mistral-large-2407-v1:0',
|
|
|
|
|
'cohere.command-text-v14',
|
|
|
|
|
'cohere.command-light-text-v14',
|
|
|
|
|
'cohere.command-r-v1:0',
|
|
|
|
|
'cohere.command-r-plus-v1:0',
|
|
|
|
|
'ai21.j2-mid-v1',
|
|
|
|
|
'ai21.j2-ultra-v1',
|
|
|
|
|
'amazon.titan-text-lite-v1',
|
|
|
|
|
'amazon.titan-text-express-v1',
|
2024-12-03 22:25:15 -05:00
|
|
|
'amazon.nova-micro-v1:0',
|
|
|
|
|
'amazon.nova-lite-v1:0',
|
|
|
|
|
'amazon.nova-pro-v1:0',
|
2024-08-08 23:31:07 -04:00
|
|
|
];
|
|
|
|
|
|
|
|
|
|
it('should return the correct prompt multipliers for all models', () => {
|
|
|
|
|
const results = awsModels.map((model) => {
|
🪨 feat: AWS Bedrock support (#3935)
* feat: Add BedrockIcon component to SVG library
* feat: EModelEndpoint.bedrock
* feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint
* fix: declare endpoint in initialization step
* chore: Update @librechat/agents dependency to version 1.4.5
* feat: backend content aggregation for agents/bedrock
* feat: abort agent requests
* feat: AWS Bedrock icons
* WIP: agent provider schema parsing
* chore: Update EditIcon props type
* refactor(useGenerationsByLatest): make agents and bedrock editable
* refactor: non-assistant message content, parts
* fix: Bedrock response `sender`
* fix: use endpointOption.model_parameters not endpointOption.modelOptions
* fix: types for step handler
* refactor: Update Agents.ToolCallDelta type
* refactor: Remove unnecessary assignment of parentMessageId in AskController
* refactor: remove unnecessary assignment of parentMessageId (agent request handler)
* fix(bedrock/agents): message regeneration
* refactor: dynamic form elements using react-hook-form Controllers
* fix: agent icons/labels for messages
* fix: agent actions
* fix: use of new dynamic tags causing application crash
* refactor: dynamic settings touch-ups
* refactor: update Slider component to allow custom track class name
* refactor: update DynamicSlider component styles
* refactor: use Constants value for GLOBAL_PROJECT_NAME (enum)
* feat: agent share global methods/controllers
* fix: agents query
* fix: `getResponseModel`
* fix: share prompt a11y issue
* refactor: update SharePrompt dialog theme styles
* refactor: explicit typing for SharePrompt
* feat: add agent roles/permissions
* chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case
* fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary
* fix: handle text parts with tool_call_ids and empty text
* fix: role initialization
* refactor: don't make instructions required
* refactor: improve typing of Text part
* fix: setShowStopButton for agents route
* chore: remove params for now
* fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API
* refactor: remove console.log statement in ContentRender component
* chore: typing, rename Context to Delete Button
* chore(DeleteButton): logging
* refactor(Action): make accessible
* style(Action): improve a11y again
* refactor: remove use/mention of mongoose sessions
* feat: first pass, sharing agents
* feat: visual indicator for global agent, remove author when serving to non-author
* wip: params
* chore: fix typing issues
* fix(schemas): typing
* refactor: improve accessibility of ListCard component and fix console React warning
* wip: reset templates for non-legacy new convos
* Revert "wip: params"
This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80.
* Revert "refactor: dynamic form elements using react-hook-form Controllers"
This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7.
* fix(Parameters): types and parameter effect update to only update local state to parameters
* refactor: optimize useDebouncedInput hook for better performance
* feat: first pass, anthropic bedrock params
* chore: paramEndpoints check for endpointType too
* fix: maxTokens to use coerceNumber.optional(),
* feat: extra chat model params
* chore: reduce code repetition
* refactor: improve preset title handling in SaveAsPresetDialog component
* refactor: improve preset handling in HeaderOptions component
* chore: improve typing, replace legacy dialog for SaveAsPresetDialog
* feat: save as preset from parameters panel
* fix: multi-search in select dropdown when using Option type
* refactor: update default showDefault value to false in Dynamic components
* feat: Bedrock presets settings
* chore: config, fix agents schema, update config version
* refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION
* refactor: update baseEndpointSchema in config.ts to include baseURL property
* refactor: update createRun function to include req parameter and set streamRate based on provider
* feat: availableRegions via config
* refactor: remove unused demo agent controller file
* WIP: title
* Update @librechat/agents to version 1.5.0
* chore: addTitle.js to handle empty responseText
* feat: support images and titles
* feat: context token updates
* Refactor BaseClient test to use expect.objectContaining
* refactor: add model select, remove header options params, move side panel params below prompts
* chore: update models list, catch title error
* feat: model service for bedrock models (env)
* chore: Remove verbose debug log in AgentClient class following stream
* feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models
* refactor: handle streamRate in `handleLLMNewToken` callback
* chore: AWS Bedrock example config in `.env.example`
* refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers
* refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes
* feat: titleModel support for bedrock
* refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.bedrock);
|
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
|
|
|
|
|
return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
|
2024-08-08 23:31:07 -04:00
|
|
|
});
|
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct completion multipliers for all models', () => {
|
|
|
|
|
const results = awsModels.map((model) => {
|
🪨 feat: AWS Bedrock support (#3935)
* feat: Add BedrockIcon component to SVG library
* feat: EModelEndpoint.bedrock
* feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint
* fix: declare endpoint in initialization step
* chore: Update @librechat/agents dependency to version 1.4.5
* feat: backend content aggregation for agents/bedrock
* feat: abort agent requests
* feat: AWS Bedrock icons
* WIP: agent provider schema parsing
* chore: Update EditIcon props type
* refactor(useGenerationsByLatest): make agents and bedrock editable
* refactor: non-assistant message content, parts
* fix: Bedrock response `sender`
* fix: use endpointOption.model_parameters not endpointOption.modelOptions
* fix: types for step handler
* refactor: Update Agents.ToolCallDelta type
* refactor: Remove unnecessary assignment of parentMessageId in AskController
* refactor: remove unnecessary assignment of parentMessageId (agent request handler)
* fix(bedrock/agents): message regeneration
* refactor: dynamic form elements using react-hook-form Controllers
* fix: agent icons/labels for messages
* fix: agent actions
* fix: use of new dynamic tags causing application crash
* refactor: dynamic settings touch-ups
* refactor: update Slider component to allow custom track class name
* refactor: update DynamicSlider component styles
* refactor: use Constants value for GLOBAL_PROJECT_NAME (enum)
* feat: agent share global methods/controllers
* fix: agents query
* fix: `getResponseModel`
* fix: share prompt a11y issue
* refactor: update SharePrompt dialog theme styles
* refactor: explicit typing for SharePrompt
* feat: add agent roles/permissions
* chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case
* fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary
* fix: handle text parts with tool_call_ids and empty text
* fix: role initialization
* refactor: don't make instructions required
* refactor: improve typing of Text part
* fix: setShowStopButton for agents route
* chore: remove params for now
* fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API
* refactor: remove console.log statement in ContentRender component
* chore: typing, rename Context to Delete Button
* chore(DeleteButton): logging
* refactor(Action): make accessible
* style(Action): improve a11y again
* refactor: remove use/mention of mongoose sessions
* feat: first pass, sharing agents
* feat: visual indicator for global agent, remove author when serving to non-author
* wip: params
* chore: fix typing issues
* fix(schemas): typing
* refactor: improve accessibility of ListCard component and fix console React warning
* wip: reset templates for non-legacy new convos
* Revert "wip: params"
This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80.
* Revert "refactor: dynamic form elements using react-hook-form Controllers"
This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7.
* fix(Parameters): types and parameter effect update to only update local state to parameters
* refactor: optimize useDebouncedInput hook for better performance
* feat: first pass, anthropic bedrock params
* chore: paramEndpoints check for endpointType too
* fix: maxTokens to use coerceNumber.optional(),
* feat: extra chat model params
* chore: reduce code repetition
* refactor: improve preset title handling in SaveAsPresetDialog component
* refactor: improve preset handling in HeaderOptions component
* chore: improve typing, replace legacy dialog for SaveAsPresetDialog
* feat: save as preset from parameters panel
* fix: multi-search in select dropdown when using Option type
* refactor: update default showDefault value to false in Dynamic components
* feat: Bedrock presets settings
* chore: config, fix agents schema, update config version
* refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION
* refactor: update baseEndpointSchema in config.ts to include baseURL property
* refactor: update createRun function to include req parameter and set streamRate based on provider
* feat: availableRegions via config
* refactor: remove unused demo agent controller file
* WIP: title
* Update @librechat/agents to version 1.5.0
* chore: addTitle.js to handle empty responseText
* feat: support images and titles
* feat: context token updates
* Refactor BaseClient test to use expect.objectContaining
* refactor: add model select, remove header options params, move side panel params below prompts
* chore: update models list, catch title error
* feat: model service for bedrock models (env)
* chore: Remove verbose debug log in AgentClient class following stream
* feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models
* refactor: handle streamRate in `handleLLMNewToken` callback
* chore: AWS Bedrock example config in `.env.example`
* refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers
* refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes
* feat: titleModel support for bedrock
* refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.bedrock);
|
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'completion' });
|
|
|
|
|
return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion;
|
2024-08-08 23:31:07 -04:00
|
|
|
});
|
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
});
|
2024-08-17 03:24:09 -04:00
|
|
|
|
2025-10-19 09:23:27 -04:00
|
|
|
describe('Amazon Model Tests', () => {
|
|
|
|
|
describe('Amazon Nova Models', () => {
|
|
|
|
|
it('should return correct pricing for nova-premier', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'nova-premier', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-premier'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'nova-premier', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-premier'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-premier'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-premier-v1:0', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-premier'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for nova-pro', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'nova-pro', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-pro'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'nova-pro', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-pro'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-pro'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-pro-v1:0', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-pro'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for nova-lite', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'nova-lite', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-lite'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'nova-lite', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-lite'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-lite'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-lite-v1:0', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-lite'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for nova-micro', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'nova-micro', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-micro'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'nova-micro', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-micro'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['nova-micro'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.nova-micro-v1:0', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['nova-micro'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should match both short and full model names to the same pricing', () => {
|
|
|
|
|
const models = ['nova-micro', 'nova-lite', 'nova-pro', 'nova-premier'];
|
|
|
|
|
const fullModels = [
|
|
|
|
|
'amazon.nova-micro-v1:0',
|
|
|
|
|
'amazon.nova-lite-v1:0',
|
|
|
|
|
'amazon.nova-pro-v1:0',
|
|
|
|
|
'amazon.nova-premier-v1:0',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
models.forEach((shortModel, i) => {
|
|
|
|
|
const fullModel = fullModels[i];
|
|
|
|
|
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
|
|
|
|
|
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
|
|
|
|
|
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
|
|
|
|
|
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
|
|
|
|
|
|
|
|
|
|
expect(shortPrompt).toBe(fullPrompt);
|
|
|
|
|
expect(shortCompletion).toBe(fullCompletion);
|
|
|
|
|
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
|
|
|
|
|
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('Amazon Titan Models', () => {
|
|
|
|
|
it('should return correct pricing for titan-text-premier', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['titan-text-premier'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'titan-text-premier', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['titan-text-premier'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['titan-text-premier'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(
|
|
|
|
|
getMultiplier({ model: 'amazon.titan-text-premier-v1:0', tokenType: 'completion' }),
|
|
|
|
|
).toBe(tokenValues['titan-text-premier'].completion);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for titan-text-express', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'titan-text-express', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['titan-text-express'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'titan-text-express', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['titan-text-express'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['titan-text-express'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(
|
|
|
|
|
getMultiplier({ model: 'amazon.titan-text-express-v1', tokenType: 'completion' }),
|
|
|
|
|
).toBe(tokenValues['titan-text-express'].completion);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for titan-text-lite', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['titan-text-lite'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'titan-text-lite', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['titan-text-lite'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['titan-text-lite'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'amazon.titan-text-lite-v1', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['titan-text-lite'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should match both short and full model names to the same pricing', () => {
|
|
|
|
|
const models = ['titan-text-lite', 'titan-text-express', 'titan-text-premier'];
|
|
|
|
|
const fullModels = [
|
|
|
|
|
'amazon.titan-text-lite-v1',
|
|
|
|
|
'amazon.titan-text-express-v1',
|
|
|
|
|
'amazon.titan-text-premier-v1:0',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
models.forEach((shortModel, i) => {
|
|
|
|
|
const fullModel = fullModels[i];
|
|
|
|
|
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
|
|
|
|
|
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
|
|
|
|
|
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
|
|
|
|
|
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
|
|
|
|
|
|
|
|
|
|
expect(shortPrompt).toBe(fullPrompt);
|
|
|
|
|
expect(shortCompletion).toBe(fullCompletion);
|
|
|
|
|
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
|
|
|
|
|
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('AI21 Model Tests', () => {
|
|
|
|
|
describe('AI21 J2 Models', () => {
|
|
|
|
|
it('should return correct pricing for j2-mid', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'j2-mid', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['j2-mid'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'j2-mid', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['j2-mid'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['j2-mid'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'ai21.j2-mid-v1', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['j2-mid'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for j2-ultra', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'j2-ultra', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['j2-ultra'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'j2-ultra', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['j2-ultra'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['j2-ultra'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'ai21.j2-ultra-v1', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['j2-ultra'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should match both short and full model names to the same pricing', () => {
|
|
|
|
|
const models = ['j2-mid', 'j2-ultra'];
|
|
|
|
|
const fullModels = ['ai21.j2-mid-v1', 'ai21.j2-ultra-v1'];
|
|
|
|
|
|
|
|
|
|
models.forEach((shortModel, i) => {
|
|
|
|
|
const fullModel = fullModels[i];
|
|
|
|
|
const shortPrompt = getMultiplier({ model: shortModel, tokenType: 'prompt' });
|
|
|
|
|
const fullPrompt = getMultiplier({ model: fullModel, tokenType: 'prompt' });
|
|
|
|
|
const shortCompletion = getMultiplier({ model: shortModel, tokenType: 'completion' });
|
|
|
|
|
const fullCompletion = getMultiplier({ model: fullModel, tokenType: 'completion' });
|
|
|
|
|
|
|
|
|
|
expect(shortPrompt).toBe(fullPrompt);
|
|
|
|
|
expect(shortCompletion).toBe(fullCompletion);
|
|
|
|
|
expect(shortPrompt).toBe(tokenValues[shortModel].prompt);
|
|
|
|
|
expect(shortCompletion).toBe(tokenValues[shortModel].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('AI21 Jamba Models', () => {
|
|
|
|
|
it('should return correct pricing for jamba-instruct', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['jamba-instruct'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['jamba-instruct'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['jamba-instruct'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'ai21.jamba-instruct-v1:0', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['jamba-instruct'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should match both short and full model names to the same pricing', () => {
|
|
|
|
|
const shortPrompt = getMultiplier({ model: 'jamba-instruct', tokenType: 'prompt' });
|
|
|
|
|
const fullPrompt = getMultiplier({
|
|
|
|
|
model: 'ai21.jamba-instruct-v1:0',
|
|
|
|
|
tokenType: 'prompt',
|
|
|
|
|
});
|
|
|
|
|
const shortCompletion = getMultiplier({ model: 'jamba-instruct', tokenType: 'completion' });
|
|
|
|
|
const fullCompletion = getMultiplier({
|
|
|
|
|
model: 'ai21.jamba-instruct-v1:0',
|
|
|
|
|
tokenType: 'completion',
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
expect(shortPrompt).toBe(fullPrompt);
|
|
|
|
|
expect(shortCompletion).toBe(fullCompletion);
|
|
|
|
|
expect(shortPrompt).toBe(tokenValues['jamba-instruct'].prompt);
|
|
|
|
|
expect(shortCompletion).toBe(tokenValues['jamba-instruct'].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2025-01-22 07:50:09 -05:00
|
|
|
describe('Deepseek Model Tests', () => {
|
2025-03-17 16:43:44 -04:00
|
|
|
const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner', 'deepseek.r1'];
|
2025-01-22 07:50:09 -05:00
|
|
|
|
|
|
|
|
it('should return the correct prompt multipliers for all models', () => {
|
|
|
|
|
const results = deepseekModels.map((model) => {
|
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
|
|
|
|
|
return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
|
|
|
|
|
});
|
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct completion multipliers for all models', () => {
|
|
|
|
|
const results = deepseekModels.map((model) => {
|
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'completion' });
|
|
|
|
|
return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion;
|
|
|
|
|
});
|
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return the correct prompt multipliers for reasoning model', () => {
|
|
|
|
|
const model = 'deepseek-reasoner';
|
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
|
expect(valueKey).toBe(model);
|
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
|
|
|
|
|
const result = tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
|
|
|
|
|
expect(result).toBe(true);
|
|
|
|
|
});
|
2025-12-01 14:27:08 -05:00
|
|
|
|
|
|
|
|
it('should return correct pricing for deepseek-chat', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['deepseek-chat'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'deepseek-chat', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['deepseek-chat'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(tokenValues['deepseek-chat'].prompt).toBe(0.28);
|
|
|
|
|
expect(tokenValues['deepseek-chat'].completion).toBe(0.42);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for deepseek-reasoner', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['deepseek-reasoner'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'deepseek-reasoner', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['deepseek-reasoner'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(tokenValues['deepseek-reasoner'].prompt).toBe(0.28);
|
|
|
|
|
expect(tokenValues['deepseek-reasoner'].completion).toBe(0.42);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle DeepSeek model name variations with provider prefixes', () => {
|
|
|
|
|
const modelVariations = [
|
|
|
|
|
'deepseek/deepseek-chat',
|
|
|
|
|
'openrouter/deepseek-chat',
|
|
|
|
|
'deepseek/deepseek-reasoner',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
|
const promptMultiplier = getMultiplier({ model, tokenType: 'prompt' });
|
|
|
|
|
const completionMultiplier = getMultiplier({ model, tokenType: 'completion' });
|
|
|
|
|
expect(promptMultiplier).toBe(0.28);
|
|
|
|
|
expect(completionMultiplier).toBe(0.42);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct cache multipliers for DeepSeek models', () => {
|
|
|
|
|
expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['deepseek-chat'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'deepseek-chat', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['deepseek-chat'].read,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['deepseek-reasoner'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'deepseek-reasoner', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['deepseek-reasoner'].read,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct cache pricing values for DeepSeek models', () => {
|
|
|
|
|
expect(cacheTokenValues['deepseek-chat'].write).toBe(0.28);
|
|
|
|
|
expect(cacheTokenValues['deepseek-chat'].read).toBe(0.028);
|
|
|
|
|
expect(cacheTokenValues['deepseek-reasoner'].write).toBe(0.28);
|
|
|
|
|
expect(cacheTokenValues['deepseek-reasoner'].read).toBe(0.028);
|
|
|
|
|
expect(cacheTokenValues['deepseek'].write).toBe(0.28);
|
|
|
|
|
expect(cacheTokenValues['deepseek'].read).toBe(0.028);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle DeepSeek cache multipliers with model variations', () => {
|
|
|
|
|
const modelVariations = ['deepseek/deepseek-chat', 'openrouter/deepseek-reasoner'];
|
|
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
|
const writeMultiplier = getCacheMultiplier({ model, cacheType: 'write' });
|
|
|
|
|
const readMultiplier = getCacheMultiplier({ model, cacheType: 'read' });
|
|
|
|
|
expect(writeMultiplier).toBe(0.28);
|
|
|
|
|
expect(readMultiplier).toBe(0.028);
|
|
|
|
|
});
|
|
|
|
|
});
|
2025-01-22 07:50:09 -05:00
|
|
|
});
|
|
|
|
|
|
2025-10-19 09:23:27 -04:00
|
|
|
describe('Qwen3 Model Tests', () => {
|
|
|
|
|
describe('Qwen3 Base Models', () => {
|
|
|
|
|
it('should return correct pricing for qwen3 base pattern', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-4b (falls back to qwen3)', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-4b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-8b', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-8b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-8b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-8b'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-14b', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-14b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-14b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-14b'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-235b-a22b', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-235b-a22b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-235b-a22b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-235b-a22b'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle model name variations with provider prefixes', () => {
|
|
|
|
|
const models = [
|
|
|
|
|
{ input: 'qwen3', expected: 'qwen3' },
|
|
|
|
|
{ input: 'qwen3-4b', expected: 'qwen3' },
|
|
|
|
|
{ input: 'qwen3-8b', expected: 'qwen3-8b' },
|
|
|
|
|
{ input: 'qwen3-32b', expected: 'qwen3-32b' },
|
|
|
|
|
];
|
|
|
|
|
models.forEach(({ input, expected }) => {
|
|
|
|
|
const withPrefix = `alibaba/${input}`;
|
|
|
|
|
expect(getMultiplier({ model: withPrefix, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues[expected].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: withPrefix, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues[expected].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('Qwen3 VL (Vision-Language) Models', () => {
|
|
|
|
|
it('should return correct pricing for qwen3-vl-8b-thinking', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-8b-thinking'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-8b-thinking', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-8b-thinking'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-vl-8b-instruct', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-8b-instruct'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-8b-instruct', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-8b-instruct'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-vl-30b-a3b', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-30b-a3b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-30b-a3b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-30b-a3b'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-vl-235b-a22b', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-235b-a22b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-vl-235b-a22b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-vl-235b-a22b'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('Qwen3 Specialized Models', () => {
|
|
|
|
|
it('should return correct pricing for qwen3-max', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-max', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-max'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-max', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-max'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-coder', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-coder'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-coder', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-coder'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-coder-plus', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-coder-plus'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-coder-plus', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-coder-plus'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-coder-flash', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-coder-flash'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-coder-flash', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-coder-flash'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct pricing for qwen3-next-80b-a3b', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['qwen3-next-80b-a3b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'qwen3-next-80b-a3b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3-next-80b-a3b'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
describe('Qwen3 Model Variations', () => {
|
|
|
|
|
it('should handle all qwen3 models with provider prefixes', () => {
|
|
|
|
|
const models = ['qwen3', 'qwen3-8b', 'qwen3-max', 'qwen3-coder', 'qwen3-vl-8b-instruct'];
|
|
|
|
|
const prefixes = ['alibaba', 'qwen', 'openrouter'];
|
|
|
|
|
|
|
|
|
|
models.forEach((model) => {
|
|
|
|
|
prefixes.forEach((prefix) => {
|
|
|
|
|
const fullModel = `${prefix}/${model}`;
|
|
|
|
|
expect(getMultiplier({ model: fullModel, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues[model].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: fullModel, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues[model].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle qwen3-4b falling back to qwen3 base pattern', () => {
|
|
|
|
|
const testCases = ['qwen3-4b', 'alibaba/qwen3-4b', 'qwen/qwen3-4b-preview'];
|
|
|
|
|
testCases.forEach((model) => {
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['qwen3'].prompt);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['qwen3'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2024-08-17 03:24:09 -04:00
|
|
|
describe('getCacheMultiplier', () => {
|
|
|
|
|
it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
|
2024-12-03 22:25:15 -05:00
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-5-sonnet'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-5-sonnet'].read,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-haiku', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-5-haiku'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-haiku', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-5-haiku'].read,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-haiku'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-haiku'].read,
|
|
|
|
|
);
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return null if cacheType is provided but not found in cacheTokenValues', () => {
|
|
|
|
|
expect(
|
|
|
|
|
getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'unknownType' }),
|
|
|
|
|
).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should derive the valueKey from the model if not provided', () => {
|
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'write', model: 'claude-3-5-sonnet-20240620' })).toBe(
|
2025-04-14 14:55:59 -04:00
|
|
|
cacheTokenValues['claude-3-5-sonnet'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'read', model: 'claude-3-haiku-20240307' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-3-haiku'].read,
|
2024-08-17 03:24:09 -04:00
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return null if only model or cacheType is missing', () => {
|
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'write' })).toBeNull();
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-3-5-sonnet' })).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return null if derived valueKey does not match any known patterns', () => {
|
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'write', model: 'gpt-4-some-other-info' })).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle endpointTokenConfig if provided', () => {
|
|
|
|
|
const endpointTokenConfig = {
|
|
|
|
|
'custom-model': {
|
|
|
|
|
write: 5,
|
|
|
|
|
read: 1,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
expect(
|
|
|
|
|
getCacheMultiplier({ model: 'custom-model', cacheType: 'write', endpointTokenConfig }),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(endpointTokenConfig['custom-model'].write);
|
2024-08-17 03:24:09 -04:00
|
|
|
expect(
|
|
|
|
|
getCacheMultiplier({ model: 'custom-model', cacheType: 'read', endpointTokenConfig }),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(endpointTokenConfig['custom-model'].read);
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return null if model is not found in endpointTokenConfig', () => {
|
|
|
|
|
const endpointTokenConfig = {
|
|
|
|
|
'custom-model': {
|
|
|
|
|
write: 5,
|
|
|
|
|
read: 1,
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
expect(
|
|
|
|
|
getCacheMultiplier({ model: 'unknown-model', cacheType: 'write', endpointTokenConfig }),
|
|
|
|
|
).toBeNull();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle models with "bedrock/" prefix', () => {
|
|
|
|
|
expect(
|
|
|
|
|
getCacheMultiplier({
|
|
|
|
|
model: 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',
|
|
|
|
|
cacheType: 'write',
|
|
|
|
|
}),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(cacheTokenValues['claude-3-5-sonnet'].write);
|
2024-08-17 03:24:09 -04:00
|
|
|
expect(
|
|
|
|
|
getCacheMultiplier({
|
|
|
|
|
model: 'bedrock/anthropic.claude-3-haiku-20240307-v1:0',
|
|
|
|
|
cacheType: 'read',
|
|
|
|
|
}),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(cacheTokenValues['claude-3-haiku'].read);
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
|
});
|
2025-02-06 18:13:18 -05:00
|
|
|
|
|
|
|
|
describe('Google Model Tests', () => {
|
|
|
|
|
const googleModels = [
|
2025-11-19 15:05:37 -05:00
|
|
|
'gemini-3',
|
2025-09-30 09:23:28 +08:00
|
|
|
'gemini-2.5-pro',
|
|
|
|
|
'gemini-2.5-flash',
|
|
|
|
|
'gemini-2.5-flash-lite',
|
2025-05-08 12:12:36 -04:00
|
|
|
'gemini-2.5-pro-preview-05-06',
|
|
|
|
|
'gemini-2.5-flash-preview-04-17',
|
|
|
|
|
'gemini-2.5-exp',
|
2025-02-06 18:13:18 -05:00
|
|
|
'gemini-2.0-flash-lite-preview-02-05',
|
|
|
|
|
'gemini-2.0-flash-001',
|
|
|
|
|
'gemini-2.0-flash-exp',
|
|
|
|
|
'gemini-2.0-pro-exp-02-05',
|
|
|
|
|
'gemini-1.5-flash-8b',
|
|
|
|
|
'gemini-1.5-flash-thinking',
|
|
|
|
|
'gemini-1.5-pro-latest',
|
|
|
|
|
'gemini-1.5-pro-preview-0409',
|
|
|
|
|
'gemini-pro-vision',
|
|
|
|
|
'gemini-1.0',
|
|
|
|
|
'gemini-pro',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
it('should return the correct prompt and completion rates for all models', () => {
|
|
|
|
|
const results = googleModels.map((model) => {
|
|
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.google);
|
|
|
|
|
const promptRate = getMultiplier({
|
|
|
|
|
model,
|
|
|
|
|
tokenType: 'prompt',
|
|
|
|
|
endpoint: EModelEndpoint.google,
|
|
|
|
|
});
|
|
|
|
|
const completionRate = getMultiplier({
|
|
|
|
|
model,
|
|
|
|
|
tokenType: 'completion',
|
|
|
|
|
endpoint: EModelEndpoint.google,
|
|
|
|
|
});
|
|
|
|
|
return { model, valueKey, promptRate, completionRate };
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
results.forEach(({ valueKey, promptRate, completionRate }) => {
|
|
|
|
|
expect(promptRate).toBe(tokenValues[valueKey].prompt);
|
|
|
|
|
expect(completionRate).toBe(tokenValues[valueKey].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should map to the correct model keys', () => {
|
|
|
|
|
const expected = {
|
2025-11-19 15:05:37 -05:00
|
|
|
'gemini-3': 'gemini-3',
|
2025-09-30 09:23:28 +08:00
|
|
|
'gemini-2.5-pro': 'gemini-2.5-pro',
|
|
|
|
|
'gemini-2.5-flash': 'gemini-2.5-flash',
|
|
|
|
|
'gemini-2.5-flash-lite': 'gemini-2.5-flash-lite',
|
2025-05-08 12:12:36 -04:00
|
|
|
'gemini-2.5-pro-preview-05-06': 'gemini-2.5-pro',
|
|
|
|
|
'gemini-2.5-flash-preview-04-17': 'gemini-2.5-flash',
|
|
|
|
|
'gemini-2.5-exp': 'gemini-2.5',
|
2025-02-06 18:13:18 -05:00
|
|
|
'gemini-2.0-flash-lite-preview-02-05': 'gemini-2.0-flash-lite',
|
|
|
|
|
'gemini-2.0-flash-001': 'gemini-2.0-flash',
|
|
|
|
|
'gemini-2.0-flash-exp': 'gemini-2.0-flash',
|
|
|
|
|
'gemini-2.0-pro-exp-02-05': 'gemini-2.0',
|
|
|
|
|
'gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
|
|
|
|
|
'gemini-1.5-flash-thinking': 'gemini-1.5-flash',
|
|
|
|
|
'gemini-1.5-pro-latest': 'gemini-1.5',
|
|
|
|
|
'gemini-1.5-pro-preview-0409': 'gemini-1.5',
|
|
|
|
|
'gemini-pro-vision': 'gemini-pro-vision',
|
|
|
|
|
'gemini-1.0': 'gemini',
|
|
|
|
|
'gemini-pro': 'gemini',
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
Object.entries(expected).forEach(([model, expectedKey]) => {
|
|
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.google);
|
|
|
|
|
expect(valueKey).toBe(expectedKey);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should handle model names with different formats', () => {
|
|
|
|
|
const testCases = [
|
|
|
|
|
{ input: 'google/gemini-pro', expected: 'gemini' },
|
|
|
|
|
{ input: 'gemini-pro/google', expected: 'gemini' },
|
|
|
|
|
{ input: 'google/gemini-2.0-flash-lite', expected: 'gemini-2.0-flash-lite' },
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
testCases.forEach(({ input, expected }) => {
|
|
|
|
|
const valueKey = getValueKey(input, EModelEndpoint.google);
|
|
|
|
|
expect(valueKey).toBe(expected);
|
|
|
|
|
expect(
|
|
|
|
|
getMultiplier({ model: input, tokenType: 'prompt', endpoint: EModelEndpoint.google }),
|
|
|
|
|
).toBe(tokenValues[expected].prompt);
|
|
|
|
|
expect(
|
|
|
|
|
getMultiplier({ model: input, tokenType: 'completion', endpoint: EModelEndpoint.google }),
|
|
|
|
|
).toBe(tokenValues[expected].completion);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
2025-02-24 20:08:55 -05:00
|
|
|
|
|
|
|
|
describe('Grok Model Tests - Pricing', () => {
|
|
|
|
|
describe('getMultiplier', () => {
|
|
|
|
|
test('should return correct prompt and completion rates for Grok vision models', () => {
|
|
|
|
|
const models = ['grok-2-vision-1212', 'grok-2-vision', 'grok-2-vision-latest'];
|
|
|
|
|
models.forEach((model) => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-2-vision'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-2-vision'].completion,
|
|
|
|
|
);
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok text models', () => {
|
|
|
|
|
const models = ['grok-2-1212', 'grok-2', 'grok-2-latest'];
|
|
|
|
|
models.forEach((model) => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['grok-2'].prompt);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-2'].completion,
|
|
|
|
|
);
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok beta models', () => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-vision-beta'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-vision-beta'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-beta', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-beta'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-beta', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-beta'].completion,
|
|
|
|
|
);
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
2025-04-12 18:46:36 -04:00
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 3 models', () => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model: 'grok-3', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3-fast', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini-fast', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini-fast'].completion,
|
|
|
|
|
);
|
2025-04-12 18:46:36 -04:00
|
|
|
});
|
|
|
|
|
|
2025-07-11 03:24:13 -04:00
|
|
|
test('should return correct prompt and completion rates for Grok 4 model', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-0709', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-0709', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-11-30 17:10:26 -05:00
|
|
|
test('should return correct prompt and completion rates for Grok 4 Fast model', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-fast', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 4.1 Fast models', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-1-fast-reasoning', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-1-fast-reasoning', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-1-fast-non-reasoning', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-4-1-fast-non-reasoning', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok Code Fast model', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'grok-code-fast-1', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-code-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'grok-code-fast-1', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-code-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-04-12 18:46:36 -04:00
|
|
|
test('should return correct prompt and completion rates for Grok 3 models with prefixes', () => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model: 'xai/grok-3', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-fast', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini-fast', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-3-mini-fast'].completion,
|
|
|
|
|
);
|
2025-04-12 18:46:36 -04:00
|
|
|
});
|
2025-07-11 03:24:13 -04:00
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 4 model with prefixes', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-0709', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-0709', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
2025-11-30 17:10:26 -05:00
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 4 Fast model with prefixes', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-fast', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 4.1 Fast models with prefixes', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-1-fast-reasoning', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-1-fast-reasoning', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-1-fast-non-reasoning', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-4-1-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(
|
|
|
|
|
getMultiplier({ model: 'xai/grok-4-1-fast-non-reasoning', tokenType: 'completion' }),
|
|
|
|
|
).toBe(tokenValues['grok-4-1-fast'].completion);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok Code Fast model with prefixes', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-code-fast-1', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['grok-code-fast'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-code-fast-1', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['grok-code-fast'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
|
|
|
|
});
|
2025-05-22 15:00:44 -04:00
|
|
|
|
2025-10-05 09:08:29 -04:00
|
|
|
describe('GLM Model Tests', () => {
|
|
|
|
|
it('should return expected value keys for GLM models', () => {
|
|
|
|
|
expect(getValueKey('glm-4.6')).toBe('glm-4.6');
|
|
|
|
|
expect(getValueKey('glm-4.5')).toBe('glm-4.5');
|
|
|
|
|
expect(getValueKey('glm-4.5v')).toBe('glm-4.5v');
|
|
|
|
|
expect(getValueKey('glm-4.5-air')).toBe('glm-4.5-air');
|
|
|
|
|
expect(getValueKey('glm-4-32b')).toBe('glm-4-32b');
|
|
|
|
|
expect(getValueKey('glm-4')).toBe('glm-4');
|
|
|
|
|
expect(getValueKey('glm4')).toBe('glm4');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should match GLM model variations with provider prefixes', () => {
|
|
|
|
|
expect(getValueKey('z-ai/glm-4.6')).toBe('glm-4.6');
|
|
|
|
|
expect(getValueKey('z-ai/glm-4.5')).toBe('glm-4.5');
|
|
|
|
|
expect(getValueKey('z-ai/glm-4.5-air')).toBe('glm-4.5-air');
|
|
|
|
|
expect(getValueKey('z-ai/glm-4.5v')).toBe('glm-4.5v');
|
|
|
|
|
expect(getValueKey('z-ai/glm-4-32b')).toBe('glm-4-32b');
|
|
|
|
|
|
|
|
|
|
expect(getValueKey('zai/glm-4.6')).toBe('glm-4.6');
|
|
|
|
|
expect(getValueKey('zai/glm-4.5')).toBe('glm-4.5');
|
|
|
|
|
expect(getValueKey('zai/glm-4.5-air')).toBe('glm-4.5-air');
|
|
|
|
|
expect(getValueKey('zai/glm-4.5v')).toBe('glm-4.5v');
|
|
|
|
|
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4.6')).toBe('glm-4.6');
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4.5')).toBe('glm-4.5');
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4.5-Air')).toBe('glm-4.5-air');
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4.5V')).toBe('glm-4.5v');
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4-32B-0414')).toBe('glm-4-32b');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should match GLM model variations with suffixes', () => {
|
|
|
|
|
expect(getValueKey('glm-4.6-fp8')).toBe('glm-4.6');
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4.6-FP8')).toBe('glm-4.6');
|
|
|
|
|
expect(getValueKey('zai-org/GLM-4.5-Air-FP8')).toBe('glm-4.5-air');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should prioritize more specific GLM model patterns', () => {
|
|
|
|
|
expect(getValueKey('glm-4.5-air-something')).toBe('glm-4.5-air');
|
|
|
|
|
expect(getValueKey('glm-4.5-something')).toBe('glm-4.5');
|
|
|
|
|
expect(getValueKey('glm-4.5v-something')).toBe('glm-4.5v');
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct multipliers for all GLM models', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4.6'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.6', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4.6'].completion,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5v'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.5v', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5v'].completion,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5-air'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.5-air', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5-air'].completion,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4.5', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5'].completion,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4-32b'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4-32b', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4-32b'].completion,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'glm-4', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4'].completion,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
expect(getMultiplier({ model: 'glm4', tokenType: 'prompt' })).toBe(tokenValues['glm4'].prompt);
|
|
|
|
|
expect(getMultiplier({ model: 'glm4', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm4'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct multipliers for GLM models with provider prefixes', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'z-ai/glm-4.6', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4.6'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'zai/glm-4.5-air', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5-air'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'zai-org/GLM-4.5V', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['glm-4.5v'].prompt,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2025-05-22 15:00:44 -04:00
|
|
|
describe('Claude Model Tests', () => {
|
|
|
|
|
it('should return correct prompt and completion rates for Claude 4 models', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['claude-sonnet-4'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['claude-sonnet-4'].completion,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'claude-opus-4', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['claude-opus-4'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'claude-opus-4', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['claude-opus-4'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-10-19 09:23:27 -04:00
|
|
|
it('should return correct prompt and completion rates for Claude Haiku 4.5', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['claude-haiku-4-5'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'claude-haiku-4-5', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['claude-haiku-4-5'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-11-24 16:30:56 -05:00
|
|
|
it('should return correct prompt and completion rates for Claude Opus 4.5', () => {
|
|
|
|
|
expect(getMultiplier({ model: 'claude-opus-4-5', tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['claude-opus-4-5'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model: 'claude-opus-4-5', tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['claude-opus-4-5'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-10-19 09:23:27 -04:00
|
|
|
it('should handle Claude Haiku 4.5 model name variations', () => {
|
|
|
|
|
const modelVariations = [
|
|
|
|
|
'claude-haiku-4-5',
|
|
|
|
|
'claude-haiku-4-5-20250420',
|
|
|
|
|
'claude-haiku-4-5-latest',
|
|
|
|
|
'anthropic/claude-haiku-4-5',
|
|
|
|
|
'claude-haiku-4-5/anthropic',
|
|
|
|
|
'claude-haiku-4-5-preview',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
|
expect(valueKey).toBe('claude-haiku-4-5');
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['claude-haiku-4-5'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['claude-haiku-4-5'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2025-11-24 16:30:56 -05:00
|
|
|
it('should handle Claude Opus 4.5 model name variations', () => {
|
|
|
|
|
const modelVariations = [
|
|
|
|
|
'claude-opus-4-5',
|
|
|
|
|
'claude-opus-4-5-20250420',
|
|
|
|
|
'claude-opus-4-5-latest',
|
|
|
|
|
'anthropic/claude-opus-4-5',
|
|
|
|
|
'claude-opus-4-5/anthropic',
|
|
|
|
|
'claude-opus-4-5-preview',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
|
expect(valueKey).toBe('claude-opus-4-5');
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
|
|
|
|
|
tokenValues['claude-opus-4-5'].prompt,
|
|
|
|
|
);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues['claude-opus-4-5'].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2025-05-22 15:00:44 -04:00
|
|
|
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
|
|
|
|
|
const modelVariations = [
|
|
|
|
|
'claude-sonnet-4',
|
|
|
|
|
'claude-sonnet-4-20240229',
|
|
|
|
|
'claude-sonnet-4-latest',
|
|
|
|
|
'anthropic/claude-sonnet-4',
|
|
|
|
|
'claude-sonnet-4/anthropic',
|
|
|
|
|
'claude-sonnet-4-preview',
|
|
|
|
|
'claude-sonnet-4-20240229-preview',
|
|
|
|
|
'claude-opus-4',
|
|
|
|
|
'claude-opus-4-20240229',
|
|
|
|
|
'claude-opus-4-latest',
|
|
|
|
|
'anthropic/claude-opus-4',
|
|
|
|
|
'claude-opus-4/anthropic',
|
|
|
|
|
'claude-opus-4-preview',
|
|
|
|
|
'claude-opus-4-20240229-preview',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
|
const isSonnet = model.includes('sonnet');
|
|
|
|
|
const expectedKey = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
|
|
|
|
|
|
|
|
|
|
expect(valueKey).toBe(expectedKey);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues[expectedKey].prompt);
|
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
|
tokenValues[expectedKey].completion,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should return correct cache rates for Claude 4 models', () => {
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-sonnet-4', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-sonnet-4'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-sonnet-4', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-sonnet-4'].read,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-opus-4', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-opus-4'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-opus-4', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-opus-4'].read,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-11-24 16:30:56 -05:00
|
|
|
it('should return correct cache rates for Claude Opus 4.5', () => {
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-opus-4-5', cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-opus-4-5'].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-opus-4-5', cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues['claude-opus-4-5'].read,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
|
2025-05-22 15:00:44 -04:00
|
|
|
it('should handle Claude 4 model cache rates with different prefixes and suffixes', () => {
|
|
|
|
|
const modelVariations = [
|
|
|
|
|
'claude-sonnet-4',
|
|
|
|
|
'claude-sonnet-4-20240229',
|
|
|
|
|
'claude-sonnet-4-latest',
|
|
|
|
|
'anthropic/claude-sonnet-4',
|
|
|
|
|
'claude-sonnet-4/anthropic',
|
|
|
|
|
'claude-sonnet-4-preview',
|
|
|
|
|
'claude-sonnet-4-20240229-preview',
|
|
|
|
|
'claude-opus-4',
|
|
|
|
|
'claude-opus-4-20240229',
|
|
|
|
|
'claude-opus-4-latest',
|
|
|
|
|
'anthropic/claude-opus-4',
|
|
|
|
|
'claude-opus-4/anthropic',
|
|
|
|
|
'claude-opus-4-preview',
|
|
|
|
|
'claude-opus-4-20240229-preview',
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
|
const isSonnet = model.includes('sonnet');
|
|
|
|
|
const expectedKey = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
|
|
|
|
|
|
|
|
|
|
expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
|
|
|
|
|
cacheTokenValues[expectedKey].write,
|
|
|
|
|
);
|
|
|
|
|
expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
|
|
|
|
|
cacheTokenValues[expectedKey].read,
|
|
|
|
|
);
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
});
|
2025-10-19 09:23:27 -04:00
|
|
|
|
|
|
|
|
describe('tokens.ts and tx.js sync validation', () => {
|
|
|
|
|
it('should resolve all models in maxTokensMap to pricing via getValueKey', () => {
|
|
|
|
|
const tokensKeys = Object.keys(maxTokensMap[EModelEndpoint.openAI]);
|
|
|
|
|
const txKeys = Object.keys(tokenValues);
|
|
|
|
|
|
|
|
|
|
const unresolved = [];
|
|
|
|
|
|
|
|
|
|
tokensKeys.forEach((key) => {
|
|
|
|
|
// Skip legacy token size mappings (e.g., '4k', '8k', '16k', '32k')
|
|
|
|
|
if (/^\d+k$/.test(key)) return;
|
|
|
|
|
|
|
|
|
|
// Skip generic pattern keys (end with '-' or ':')
|
|
|
|
|
if (key.endsWith('-') || key.endsWith(':')) return;
|
|
|
|
|
|
|
|
|
|
// Try to resolve via getValueKey
|
|
|
|
|
const resolvedKey = getValueKey(key);
|
|
|
|
|
|
|
|
|
|
// If it resolves and the resolved key has pricing, success
|
|
|
|
|
if (resolvedKey && txKeys.includes(resolvedKey)) return;
|
|
|
|
|
|
|
|
|
|
// If it resolves to a legacy key (4k, 8k, etc), also OK
|
|
|
|
|
if (resolvedKey && /^\d+k$/.test(resolvedKey)) return;
|
|
|
|
|
|
|
|
|
|
// If we get here, this model can't get pricing - flag it
|
|
|
|
|
unresolved.push({
|
|
|
|
|
key,
|
|
|
|
|
resolvedKey: resolvedKey || 'undefined',
|
|
|
|
|
context: maxTokensMap[EModelEndpoint.openAI][key],
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (unresolved.length > 0) {
|
|
|
|
|
console.log('\nModels that cannot resolve to pricing via getValueKey:');
|
|
|
|
|
unresolved.forEach(({ key, resolvedKey, context }) => {
|
|
|
|
|
console.log(` - '${key}' → '${resolvedKey}' (context: ${context})`);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(unresolved).toEqual([]);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should not have redundant dated variants with same pricing and context as base model', () => {
|
|
|
|
|
const txKeys = Object.keys(tokenValues);
|
|
|
|
|
const redundant = [];
|
|
|
|
|
|
|
|
|
|
txKeys.forEach((key) => {
|
|
|
|
|
// Check if this is a dated variant (ends with -YYYY-MM-DD)
|
|
|
|
|
if (key.match(/.*-\d{4}-\d{2}-\d{2}$/)) {
|
|
|
|
|
const baseKey = key.replace(/-\d{4}-\d{2}-\d{2}$/, '');
|
|
|
|
|
|
|
|
|
|
if (txKeys.includes(baseKey)) {
|
|
|
|
|
const variantPricing = tokenValues[key];
|
|
|
|
|
const basePricing = tokenValues[baseKey];
|
|
|
|
|
const variantContext = maxTokensMap[EModelEndpoint.openAI][key];
|
|
|
|
|
const baseContext = maxTokensMap[EModelEndpoint.openAI][baseKey];
|
|
|
|
|
|
|
|
|
|
const samePricing =
|
|
|
|
|
variantPricing.prompt === basePricing.prompt &&
|
|
|
|
|
variantPricing.completion === basePricing.completion;
|
|
|
|
|
const sameContext = variantContext === baseContext;
|
|
|
|
|
|
|
|
|
|
if (samePricing && sameContext) {
|
|
|
|
|
redundant.push({
|
|
|
|
|
key,
|
|
|
|
|
baseKey,
|
|
|
|
|
pricing: `${variantPricing.prompt}/${variantPricing.completion}`,
|
|
|
|
|
context: variantContext,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (redundant.length > 0) {
|
|
|
|
|
console.log('\nRedundant dated variants found (same pricing and context as base):');
|
|
|
|
|
redundant.forEach(({ key, baseKey, pricing, context }) => {
|
|
|
|
|
console.log(` - '${key}' → '${baseKey}' (pricing: ${pricing}, context: ${context})`);
|
|
|
|
|
console.log(` Can be removed - pattern matching will handle it`);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(redundant).toEqual([]);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
it('should have context windows in tokens.ts for all models with pricing in tx.js (openAI catch-all)', () => {
|
|
|
|
|
const txKeys = Object.keys(tokenValues);
|
|
|
|
|
const missingContext = [];
|
|
|
|
|
|
|
|
|
|
txKeys.forEach((key) => {
|
|
|
|
|
// Skip legacy token size mappings (4k, 8k, 16k, 32k)
|
|
|
|
|
if (/^\d+k$/.test(key)) return;
|
|
|
|
|
|
|
|
|
|
// Check if this model has a context window defined
|
|
|
|
|
const context = maxTokensMap[EModelEndpoint.openAI][key];
|
|
|
|
|
|
|
|
|
|
if (!context) {
|
|
|
|
|
const pricing = tokenValues[key];
|
|
|
|
|
missingContext.push({
|
|
|
|
|
key,
|
|
|
|
|
pricing: `${pricing.prompt}/${pricing.completion}`,
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (missingContext.length > 0) {
|
|
|
|
|
console.log('\nModels with pricing but missing context in tokens.ts:');
|
|
|
|
|
missingContext.forEach(({ key, pricing }) => {
|
|
|
|
|
console.log(` - '${key}' (pricing: ${pricing})`);
|
|
|
|
|
console.log(` Add to tokens.ts openAIModels/bedrockModels/etc.`);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
expect(missingContext).toEqual([]);
|
|
|
|
|
});
|
|
|
|
|
});
|