🪨 feat: AWS Bedrock support (#3935)
* feat: Add BedrockIcon component to SVG library
* feat: EModelEndpoint.bedrock
* feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint
* fix: declare endpoint in initialization step
* chore: Update @librechat/agents dependency to version 1.4.5
* feat: backend content aggregation for agents/bedrock
* feat: abort agent requests
* feat: AWS Bedrock icons
* WIP: agent provider schema parsing
* chore: Update EditIcon props type
* refactor(useGenerationsByLatest): make agents and bedrock editable
* refactor: non-assistant message content, parts
* fix: Bedrock response `sender`
* fix: use endpointOption.model_parameters not endpointOption.modelOptions
* fix: types for step handler
* refactor: Update Agents.ToolCallDelta type
* refactor: Remove unnecessary assignment of parentMessageId in AskController
* refactor: remove unnecessary assignment of parentMessageId (agent request handler)
* fix(bedrock/agents): message regeneration
* refactor: dynamic form elements using react-hook-form Controllers
* fix: agent icons/labels for messages
* fix: agent actions
* fix: use of new dynamic tags causing application crash
* refactor: dynamic settings touch-ups
* refactor: update Slider component to allow custom track class name
* refactor: update DynamicSlider component styles
* refactor: use Constants value for GLOBAL_PROJECT_NAME (enum)
* feat: agent share global methods/controllers
* fix: agents query
* fix: `getResponseModel`
* fix: share prompt a11y issue
* refactor: update SharePrompt dialog theme styles
* refactor: explicit typing for SharePrompt
* feat: add agent roles/permissions
* chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case
* fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary
* fix: handle text parts with tool_call_ids and empty text
* fix: role initialization
* refactor: don't make instructions required
* refactor: improve typing of Text part
* fix: setShowStopButton for agents route
* chore: remove params for now
* fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API
* refactor: remove console.log statement in ContentRender component
* chore: typing, rename Context to Delete Button
* chore(DeleteButton): logging
* refactor(Action): make accessible
* style(Action): improve a11y again
* refactor: remove use/mention of mongoose sessions
* feat: first pass, sharing agents
* feat: visual indicator for global agent, remove author when serving to non-author
* wip: params
* chore: fix typing issues
* fix(schemas): typing
* refactor: improve accessibility of ListCard component and fix console React warning
* wip: reset templates for non-legacy new convos
* Revert "wip: params"
This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80.
* Revert "refactor: dynamic form elements using react-hook-form Controllers"
This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7.
* fix(Parameters): types and parameter effect update to only update local state to parameters
* refactor: optimize useDebouncedInput hook for better performance
* feat: first pass, anthropic bedrock params
* chore: paramEndpoints check for endpointType too
* fix: maxTokens to use coerceNumber.optional(),
* feat: extra chat model params
* chore: reduce code repetition
* refactor: improve preset title handling in SaveAsPresetDialog component
* refactor: improve preset handling in HeaderOptions component
* chore: improve typing, replace legacy dialog for SaveAsPresetDialog
* feat: save as preset from parameters panel
* fix: multi-search in select dropdown when using Option type
* refactor: update default showDefault value to false in Dynamic components
* feat: Bedrock presets settings
* chore: config, fix agents schema, update config version
* refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION
* refactor: update baseEndpointSchema in config.ts to include baseURL property
* refactor: update createRun function to include req parameter and set streamRate based on provider
* feat: availableRegions via config
* refactor: remove unused demo agent controller file
* WIP: title
* Update @librechat/agents to version 1.5.0
* chore: addTitle.js to handle empty responseText
* feat: support images and titles
* feat: context token updates
* Refactor BaseClient test to use expect.objectContaining
* refactor: add model select, remove header options params, move side panel params below prompts
* chore: update models list, catch title error
* feat: model service for bedrock models (env)
* chore: Remove verbose debug log in AgentClient class following stream
* feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models
* refactor: handle streamRate in `handleLLMNewToken` callback
* chore: AWS Bedrock example config in `.env.example`
* refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers
* refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes
* feat: titleModel support for bedrock
* refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
|
|
|
const { EModelEndpoint } = require('librechat-data-provider');
|
2024-08-17 03:24:09 -04:00
|
|
|
const {
|
|
|
|
defaultRate,
|
|
|
|
tokenValues,
|
|
|
|
getValueKey,
|
|
|
|
getMultiplier,
|
2024-12-03 22:25:15 -05:00
|
|
|
cacheTokenValues,
|
2024-08-17 03:24:09 -04:00
|
|
|
getCacheMultiplier,
|
|
|
|
} = require('./tx');
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
|
|
|
|
describe('getValueKey', () => {
|
|
|
|
it('should return "16k" for model name containing "gpt-3.5-turbo-16k"', () => {
|
|
|
|
expect(getValueKey('gpt-3.5-turbo-16k-some-other-info')).toBe('16k');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "4k" for model name containing "gpt-3.5"', () => {
|
|
|
|
expect(getValueKey('gpt-3.5-some-other-info')).toBe('4k');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "32k" for model name containing "gpt-4-32k"', () => {
|
|
|
|
expect(getValueKey('gpt-4-32k-some-other-info')).toBe('32k');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "8k" for model name containing "gpt-4"', () => {
|
|
|
|
expect(getValueKey('gpt-4-some-other-info')).toBe('8k');
|
|
|
|
});
|
|
|
|
|
2025-08-07 16:01:29 -04:00
|
|
|
it('should return "gpt-5" for model name containing "gpt-5"', () => {
|
|
|
|
expect(getValueKey('gpt-5-some-other-info')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30-0130')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('openai/gpt-5')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('openai/gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-turbo')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-0130')).toBe('gpt-5');
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
2023-11-06 15:26:16 -05:00
|
|
|
|
|
|
|
it('should return "gpt-3.5-turbo-1106" for model name containing "gpt-3.5-turbo-1106"', () => {
|
|
|
|
expect(getValueKey('gpt-3.5-turbo-1106-some-other-info')).toBe('gpt-3.5-turbo-1106');
|
|
|
|
expect(getValueKey('openai/gpt-3.5-turbo-1106')).toBe('gpt-3.5-turbo-1106');
|
|
|
|
expect(getValueKey('gpt-3.5-turbo-1106/openai')).toBe('gpt-3.5-turbo-1106');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "gpt-4-1106" for model name containing "gpt-4-1106"', () => {
|
|
|
|
expect(getValueKey('gpt-4-1106-some-other-info')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('gpt-4-1106-vision-preview')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('gpt-4-1106-preview')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('openai/gpt-4-1106')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('gpt-4-1106/openai/')).toBe('gpt-4-1106');
|
|
|
|
});
|
2024-04-23 08:57:20 -04:00
|
|
|
|
|
|
|
it('should return "gpt-4-1106" for model type of "gpt-4-1106"', () => {
|
|
|
|
expect(getValueKey('gpt-4-vision-preview')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('openai/gpt-4-1106')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('gpt-4-turbo')).toBe('gpt-4-1106');
|
|
|
|
expect(getValueKey('gpt-4-0125')).toBe('gpt-4-1106');
|
|
|
|
});
|
2024-05-13 14:25:02 -04:00
|
|
|
|
2025-02-28 12:19:21 -05:00
|
|
|
it('should return "gpt-4.5" for model type of "gpt-4.5"', () => {
|
|
|
|
expect(getValueKey('gpt-4.5-preview')).toBe('gpt-4.5');
|
|
|
|
expect(getValueKey('gpt-4.5-2024-08-06')).toBe('gpt-4.5');
|
|
|
|
expect(getValueKey('gpt-4.5-2024-08-06-0718')).toBe('gpt-4.5');
|
|
|
|
expect(getValueKey('openai/gpt-4.5')).toBe('gpt-4.5');
|
|
|
|
expect(getValueKey('openai/gpt-4.5-2024-08-06')).toBe('gpt-4.5');
|
|
|
|
expect(getValueKey('gpt-4.5-turbo')).toBe('gpt-4.5');
|
|
|
|
expect(getValueKey('gpt-4.5-0125')).toBe('gpt-4.5');
|
|
|
|
});
|
|
|
|
|
2025-04-14 14:55:59 -04:00
|
|
|
it('should return "gpt-4.1" for model type of "gpt-4.1"', () => {
|
|
|
|
expect(getValueKey('gpt-4.1-preview')).toBe('gpt-4.1');
|
|
|
|
expect(getValueKey('gpt-4.1-2024-08-06')).toBe('gpt-4.1');
|
|
|
|
expect(getValueKey('gpt-4.1-2024-08-06-0718')).toBe('gpt-4.1');
|
|
|
|
expect(getValueKey('openai/gpt-4.1')).toBe('gpt-4.1');
|
|
|
|
expect(getValueKey('openai/gpt-4.1-2024-08-06')).toBe('gpt-4.1');
|
|
|
|
expect(getValueKey('gpt-4.1-turbo')).toBe('gpt-4.1');
|
|
|
|
expect(getValueKey('gpt-4.1-0125')).toBe('gpt-4.1');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "gpt-4.1-mini" for model type of "gpt-4.1-mini"', () => {
|
|
|
|
expect(getValueKey('gpt-4.1-mini-preview')).toBe('gpt-4.1-mini');
|
|
|
|
expect(getValueKey('gpt-4.1-mini-2024-08-06')).toBe('gpt-4.1-mini');
|
|
|
|
expect(getValueKey('openai/gpt-4.1-mini')).toBe('gpt-4.1-mini');
|
|
|
|
expect(getValueKey('gpt-4.1-mini-0125')).toBe('gpt-4.1-mini');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "gpt-4.1-nano" for model type of "gpt-4.1-nano"', () => {
|
|
|
|
expect(getValueKey('gpt-4.1-nano-preview')).toBe('gpt-4.1-nano');
|
|
|
|
expect(getValueKey('gpt-4.1-nano-2024-08-06')).toBe('gpt-4.1-nano');
|
|
|
|
expect(getValueKey('openai/gpt-4.1-nano')).toBe('gpt-4.1-nano');
|
|
|
|
expect(getValueKey('gpt-4.1-nano-0125')).toBe('gpt-4.1-nano');
|
|
|
|
});
|
|
|
|
|
2025-08-07 16:01:29 -04:00
|
|
|
it('should return "gpt-5" for model type of "gpt-5"', () => {
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-2025-01-30-0130')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('openai/gpt-5')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('openai/gpt-5-2025-01-30')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-turbo')).toBe('gpt-5');
|
|
|
|
expect(getValueKey('gpt-5-0130')).toBe('gpt-5');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "gpt-5-mini" for model type of "gpt-5-mini"', () => {
|
|
|
|
expect(getValueKey('gpt-5-mini-2025-01-30')).toBe('gpt-5-mini');
|
|
|
|
expect(getValueKey('openai/gpt-5-mini')).toBe('gpt-5-mini');
|
|
|
|
expect(getValueKey('gpt-5-mini-0130')).toBe('gpt-5-mini');
|
|
|
|
expect(getValueKey('gpt-5-mini-2025-01-30-0130')).toBe('gpt-5-mini');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "gpt-5-nano" for model type of "gpt-5-nano"', () => {
|
|
|
|
expect(getValueKey('gpt-5-nano-2025-01-30')).toBe('gpt-5-nano');
|
|
|
|
expect(getValueKey('openai/gpt-5-nano')).toBe('gpt-5-nano');
|
|
|
|
expect(getValueKey('gpt-5-nano-0130')).toBe('gpt-5-nano');
|
|
|
|
expect(getValueKey('gpt-5-nano-2025-01-30-0130')).toBe('gpt-5-nano');
|
|
|
|
});
|
|
|
|
|
2024-05-13 14:25:02 -04:00
|
|
|
it('should return "gpt-4o" for model type of "gpt-4o"', () => {
|
2024-10-11 05:27:29 -07:00
|
|
|
expect(getValueKey('gpt-4o-2024-08-06')).toBe('gpt-4o');
|
|
|
|
expect(getValueKey('gpt-4o-2024-08-06-0718')).toBe('gpt-4o');
|
2024-05-13 14:25:02 -04:00
|
|
|
expect(getValueKey('openai/gpt-4o')).toBe('gpt-4o');
|
2024-10-11 05:27:29 -07:00
|
|
|
expect(getValueKey('openai/gpt-4o-2024-08-06')).toBe('gpt-4o');
|
2024-05-13 14:25:02 -04:00
|
|
|
expect(getValueKey('gpt-4o-turbo')).toBe('gpt-4o');
|
|
|
|
expect(getValueKey('gpt-4o-0125')).toBe('gpt-4o');
|
|
|
|
});
|
2024-06-20 20:48:15 -04:00
|
|
|
|
2024-07-19 13:59:07 +02:00
|
|
|
it('should return "gpt-4o-mini" for model type of "gpt-4o-mini"', () => {
|
|
|
|
expect(getValueKey('gpt-4o-mini-2024-07-18')).toBe('gpt-4o-mini');
|
|
|
|
expect(getValueKey('openai/gpt-4o-mini')).toBe('gpt-4o-mini');
|
|
|
|
expect(getValueKey('gpt-4o-mini-0718')).toBe('gpt-4o-mini');
|
2024-10-11 05:27:29 -07:00
|
|
|
expect(getValueKey('gpt-4o-2024-08-06-0718')).not.toBe('gpt-4o-mini');
|
2024-08-08 23:31:07 -04:00
|
|
|
});
|
|
|
|
|
2024-10-11 05:27:29 -07:00
|
|
|
it('should return "gpt-4o-2024-05-13" for model type of "gpt-4o-2024-05-13"', () => {
|
|
|
|
expect(getValueKey('gpt-4o-2024-05-13')).toBe('gpt-4o-2024-05-13');
|
|
|
|
expect(getValueKey('openai/gpt-4o-2024-05-13')).toBe('gpt-4o-2024-05-13');
|
|
|
|
expect(getValueKey('gpt-4o-2024-05-13-0718')).toBe('gpt-4o-2024-05-13');
|
|
|
|
expect(getValueKey('gpt-4o-2024-05-13-0718')).not.toBe('gpt-4o');
|
2024-07-19 13:59:07 +02:00
|
|
|
});
|
|
|
|
|
2024-08-16 15:28:17 -04:00
|
|
|
it('should return "gpt-4o" for model type of "chatgpt-4o"', () => {
|
|
|
|
expect(getValueKey('chatgpt-4o-latest')).toBe('gpt-4o');
|
|
|
|
expect(getValueKey('openai/chatgpt-4o-latest')).toBe('gpt-4o');
|
|
|
|
expect(getValueKey('chatgpt-4o-latest-0916')).toBe('gpt-4o');
|
|
|
|
expect(getValueKey('chatgpt-4o-latest-0718')).toBe('gpt-4o');
|
|
|
|
});
|
|
|
|
|
2025-02-24 20:08:55 -05:00
|
|
|
it('should return "claude-3-7-sonnet" for model type of "claude-3-7-sonnet-"', () => {
|
|
|
|
expect(getValueKey('claude-3-7-sonnet-20240620')).toBe('claude-3-7-sonnet');
|
|
|
|
expect(getValueKey('anthropic/claude-3-7-sonnet')).toBe('claude-3-7-sonnet');
|
|
|
|
expect(getValueKey('claude-3-7-sonnet-turbo')).toBe('claude-3-7-sonnet');
|
|
|
|
expect(getValueKey('claude-3-7-sonnet-0125')).toBe('claude-3-7-sonnet');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "claude-3.7-sonnet" for model type of "claude-3.7-sonnet-"', () => {
|
|
|
|
expect(getValueKey('claude-3.7-sonnet-20240620')).toBe('claude-3.7-sonnet');
|
|
|
|
expect(getValueKey('anthropic/claude-3.7-sonnet')).toBe('claude-3.7-sonnet');
|
|
|
|
expect(getValueKey('claude-3.7-sonnet-turbo')).toBe('claude-3.7-sonnet');
|
|
|
|
expect(getValueKey('claude-3.7-sonnet-0125')).toBe('claude-3.7-sonnet');
|
|
|
|
});
|
|
|
|
|
2024-06-20 20:48:15 -04:00
|
|
|
it('should return "claude-3-5-sonnet" for model type of "claude-3-5-sonnet-"', () => {
|
|
|
|
expect(getValueKey('claude-3-5-sonnet-20240620')).toBe('claude-3-5-sonnet');
|
|
|
|
expect(getValueKey('anthropic/claude-3-5-sonnet')).toBe('claude-3-5-sonnet');
|
|
|
|
expect(getValueKey('claude-3-5-sonnet-turbo')).toBe('claude-3-5-sonnet');
|
|
|
|
expect(getValueKey('claude-3-5-sonnet-0125')).toBe('claude-3-5-sonnet');
|
|
|
|
});
|
2024-08-27 09:07:04 -04:00
|
|
|
|
|
|
|
it('should return "claude-3.5-sonnet" for model type of "claude-3.5-sonnet-"', () => {
|
|
|
|
expect(getValueKey('claude-3.5-sonnet-20240620')).toBe('claude-3.5-sonnet');
|
|
|
|
expect(getValueKey('anthropic/claude-3.5-sonnet')).toBe('claude-3.5-sonnet');
|
|
|
|
expect(getValueKey('claude-3.5-sonnet-turbo')).toBe('claude-3.5-sonnet');
|
|
|
|
expect(getValueKey('claude-3.5-sonnet-0125')).toBe('claude-3.5-sonnet');
|
|
|
|
});
|
2024-11-04 15:10:24 -05:00
|
|
|
|
|
|
|
it('should return "claude-3-5-haiku" for model type of "claude-3-5-haiku-"', () => {
|
|
|
|
expect(getValueKey('claude-3-5-haiku-20240620')).toBe('claude-3-5-haiku');
|
|
|
|
expect(getValueKey('anthropic/claude-3-5-haiku')).toBe('claude-3-5-haiku');
|
|
|
|
expect(getValueKey('claude-3-5-haiku-turbo')).toBe('claude-3-5-haiku');
|
|
|
|
expect(getValueKey('claude-3-5-haiku-0125')).toBe('claude-3-5-haiku');
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return "claude-3.5-haiku" for model type of "claude-3.5-haiku-"', () => {
|
|
|
|
expect(getValueKey('claude-3.5-haiku-20240620')).toBe('claude-3.5-haiku');
|
|
|
|
expect(getValueKey('anthropic/claude-3.5-haiku')).toBe('claude-3.5-haiku');
|
|
|
|
expect(getValueKey('claude-3.5-haiku-turbo')).toBe('claude-3.5-haiku');
|
|
|
|
expect(getValueKey('claude-3.5-haiku-0125')).toBe('claude-3.5-haiku');
|
|
|
|
});
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
describe('getMultiplier', () => {
|
|
|
|
it('should return the correct multiplier for a given valueKey and tokenType', () => {
|
2023-11-06 15:26:16 -05:00
|
|
|
expect(getMultiplier({ valueKey: '8k', tokenType: 'prompt' })).toBe(tokenValues['8k'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey: '8k', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['8k'].completion,
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
2025-04-17 00:40:26 -04:00
|
|
|
it('should return correct multipliers for o4-mini and o3', () => {
|
|
|
|
['o4-mini', 'o3'].forEach((model) => {
|
|
|
|
const prompt = getMultiplier({ model, tokenType: 'prompt' });
|
|
|
|
const completion = getMultiplier({ model, tokenType: 'completion' });
|
|
|
|
expect(prompt).toBe(tokenValues[model].prompt);
|
|
|
|
expect(completion).toBe(tokenValues[model].completion);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2023-10-06 13:21:44 -04:00
|
|
|
it('should return defaultRate if tokenType is provided but not found in tokenValues', () => {
|
|
|
|
expect(getMultiplier({ valueKey: '8k', tokenType: 'unknownType' })).toBe(defaultRate);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
it('should derive the valueKey from the model if not provided', () => {
|
2023-11-06 15:26:16 -05:00
|
|
|
expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-4-some-other-info' })).toBe(
|
|
|
|
tokenValues['8k'].prompt,
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
it('should return 1 if only model or tokenType is missing', () => {
|
|
|
|
expect(getMultiplier({ tokenType: 'prompt' })).toBe(1);
|
|
|
|
expect(getMultiplier({ model: 'gpt-4-some-other-info' })).toBe(1);
|
|
|
|
});
|
|
|
|
|
2023-11-06 15:26:16 -05:00
|
|
|
it('should return the correct multiplier for gpt-3.5-turbo-1106', () => {
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-3.5-turbo-1106', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-3.5-turbo-1106'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-3.5-turbo-1106', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-3.5-turbo-1106'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-4-1106', () => {
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-4-1106', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4-1106'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey: 'gpt-4-1106', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2025-08-07 16:01:29 -04:00
|
|
|
it('should return the correct multiplier for gpt-5', () => {
|
|
|
|
const valueKey = getValueKey('gpt-5-2025-01-30');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-5'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-5'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-5'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-5-mini', () => {
|
|
|
|
const valueKey = getValueKey('gpt-5-mini-2025-01-30');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-mini'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-5-mini'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-mini-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-5-mini'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5-mini', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-5-mini'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-5-nano', () => {
|
|
|
|
const valueKey = getValueKey('gpt-5-nano-2025-01-30');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-5-nano'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-5-nano'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'gpt-5-nano-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-5-nano'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-5-nano', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-5-nano'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2024-05-13 14:25:02 -04:00
|
|
|
it('should return the correct multiplier for gpt-4o', () => {
|
2024-10-11 05:27:29 -07:00
|
|
|
const valueKey = getValueKey('gpt-4o-2024-08-06');
|
2024-05-13 14:25:02 -04:00
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4o'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2025-04-14 14:55:59 -04:00
|
|
|
it('should return the correct multiplier for gpt-4.1', () => {
|
|
|
|
const valueKey = getValueKey('gpt-4.1-2024-08-06');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4.1'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4.1'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'gpt-4.1-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4.1'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-4.1', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4.1'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-4.1-mini', () => {
|
|
|
|
const valueKey = getValueKey('gpt-4.1-mini-2024-08-06');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-mini'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-mini'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'gpt-4.1-mini-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-mini'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-4.1-mini', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-mini'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct multiplier for gpt-4.1-nano', () => {
|
|
|
|
const valueKey = getValueKey('gpt-4.1-nano-2024-08-06');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-nano'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-nano'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'gpt-4.1-nano-preview', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-nano'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'openai/gpt-4.1-nano', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4.1-nano'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2024-07-19 13:59:07 +02:00
|
|
|
it('should return the correct multiplier for gpt-4o-mini', () => {
|
|
|
|
const valueKey = getValueKey('gpt-4o-mini-2024-07-18');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['gpt-4o-mini'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4o-mini'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2024-08-16 15:28:17 -04:00
|
|
|
it('should return the correct multiplier for chatgpt-4o-latest', () => {
|
|
|
|
const valueKey = getValueKey('chatgpt-4o-latest');
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'prompt' })).toBe(tokenValues['gpt-4o'].prompt);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['gpt-4o'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ valueKey, tokenType: 'completion' })).not.toBe(
|
|
|
|
tokenValues['gpt-4o-mini'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2023-11-06 15:26:16 -05:00
|
|
|
it('should derive the valueKey from the model if not provided for new models', () => {
|
|
|
|
expect(
|
|
|
|
getMultiplier({ tokenType: 'prompt', model: 'gpt-3.5-turbo-1106-some-other-info' }),
|
|
|
|
).toBe(tokenValues['gpt-3.5-turbo-1106'].prompt);
|
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-1106-vision-preview' })).toBe(
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
);
|
2024-01-25 22:57:18 -05:00
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-0125-preview' })).toBe(
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-4-turbo-vision-preview' })).toBe(
|
|
|
|
tokenValues['gpt-4-1106'].completion,
|
|
|
|
);
|
2024-02-02 01:01:11 -05:00
|
|
|
expect(getMultiplier({ tokenType: 'completion', model: 'gpt-3.5-turbo-0125' })).toBe(
|
|
|
|
tokenValues['gpt-3.5-turbo-0125'].completion,
|
|
|
|
);
|
2023-11-06 15:26:16 -05:00
|
|
|
});
|
|
|
|
|
2023-10-06 13:21:44 -04:00
|
|
|
it('should return defaultRate if derived valueKey does not match any known patterns', () => {
|
2025-08-07 16:01:29 -04:00
|
|
|
expect(getMultiplier({ tokenType: 'prompt', model: 'gpt-10-some-other-info' })).toBe(
|
2023-10-06 13:21:44 -04:00
|
|
|
defaultRate,
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
2025-08-07 15:03:19 -04:00
|
|
|
|
|
|
|
it('should return correct multipliers for GPT-OSS models', () => {
|
|
|
|
const models = ['gpt-oss-20b', 'gpt-oss-120b'];
|
|
|
|
models.forEach((key) => {
|
|
|
|
const expectedPrompt = tokenValues[key].prompt;
|
|
|
|
const expectedCompletion = tokenValues[key].completion;
|
|
|
|
expect(getMultiplier({ valueKey: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
|
|
|
expect(getMultiplier({ valueKey: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
|
|
|
expect(getMultiplier({ model: key, tokenType: 'prompt' })).toBe(expectedPrompt);
|
|
|
|
expect(getMultiplier({ model: key, tokenType: 'completion' })).toBe(expectedCompletion);
|
|
|
|
});
|
|
|
|
});
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
2024-08-08 23:31:07 -04:00
|
|
|
|
|
|
|
describe('AWS Bedrock Model Tests', () => {
|
|
|
|
const awsModels = [
|
2024-12-03 22:25:15 -05:00
|
|
|
'anthropic.claude-3-5-haiku-20241022-v1:0',
|
2024-08-08 23:31:07 -04:00
|
|
|
'anthropic.claude-3-haiku-20240307-v1:0',
|
|
|
|
'anthropic.claude-3-sonnet-20240229-v1:0',
|
|
|
|
'anthropic.claude-3-opus-20240229-v1:0',
|
|
|
|
'anthropic.claude-3-5-sonnet-20240620-v1:0',
|
|
|
|
'anthropic.claude-v2:1',
|
|
|
|
'anthropic.claude-instant-v1',
|
|
|
|
'meta.llama2-13b-chat-v1',
|
|
|
|
'meta.llama2-70b-chat-v1',
|
|
|
|
'meta.llama3-8b-instruct-v1:0',
|
|
|
|
'meta.llama3-70b-instruct-v1:0',
|
|
|
|
'meta.llama3-1-8b-instruct-v1:0',
|
|
|
|
'meta.llama3-1-70b-instruct-v1:0',
|
|
|
|
'meta.llama3-1-405b-instruct-v1:0',
|
|
|
|
'mistral.mistral-7b-instruct-v0:2',
|
|
|
|
'mistral.mistral-small-2402-v1:0',
|
|
|
|
'mistral.mixtral-8x7b-instruct-v0:1',
|
|
|
|
'mistral.mistral-large-2402-v1:0',
|
|
|
|
'mistral.mistral-large-2407-v1:0',
|
|
|
|
'cohere.command-text-v14',
|
|
|
|
'cohere.command-light-text-v14',
|
|
|
|
'cohere.command-r-v1:0',
|
|
|
|
'cohere.command-r-plus-v1:0',
|
|
|
|
'ai21.j2-mid-v1',
|
|
|
|
'ai21.j2-ultra-v1',
|
|
|
|
'amazon.titan-text-lite-v1',
|
|
|
|
'amazon.titan-text-express-v1',
|
2024-12-03 22:25:15 -05:00
|
|
|
'amazon.nova-micro-v1:0',
|
|
|
|
'amazon.nova-lite-v1:0',
|
|
|
|
'amazon.nova-pro-v1:0',
|
2024-08-08 23:31:07 -04:00
|
|
|
];
|
|
|
|
|
|
|
|
it('should return the correct prompt multipliers for all models', () => {
|
|
|
|
const results = awsModels.map((model) => {
|
🪨 feat: AWS Bedrock support (#3935)
* feat: Add BedrockIcon component to SVG library
* feat: EModelEndpoint.bedrock
* feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint
* fix: declare endpoint in initialization step
* chore: Update @librechat/agents dependency to version 1.4.5
* feat: backend content aggregation for agents/bedrock
* feat: abort agent requests
* feat: AWS Bedrock icons
* WIP: agent provider schema parsing
* chore: Update EditIcon props type
* refactor(useGenerationsByLatest): make agents and bedrock editable
* refactor: non-assistant message content, parts
* fix: Bedrock response `sender`
* fix: use endpointOption.model_parameters not endpointOption.modelOptions
* fix: types for step handler
* refactor: Update Agents.ToolCallDelta type
* refactor: Remove unnecessary assignment of parentMessageId in AskController
* refactor: remove unnecessary assignment of parentMessageId (agent request handler)
* fix(bedrock/agents): message regeneration
* refactor: dynamic form elements using react-hook-form Controllers
* fix: agent icons/labels for messages
* fix: agent actions
* fix: use of new dynamic tags causing application crash
* refactor: dynamic settings touch-ups
* refactor: update Slider component to allow custom track class name
* refactor: update DynamicSlider component styles
* refactor: use Constants value for GLOBAL_PROJECT_NAME (enum)
* feat: agent share global methods/controllers
* fix: agents query
* fix: `getResponseModel`
* fix: share prompt a11y issue
* refactor: update SharePrompt dialog theme styles
* refactor: explicit typing for SharePrompt
* feat: add agent roles/permissions
* chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case
* fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary
* fix: handle text parts with tool_call_ids and empty text
* fix: role initialization
* refactor: don't make instructions required
* refactor: improve typing of Text part
* fix: setShowStopButton for agents route
* chore: remove params for now
* fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API
* refactor: remove console.log statement in ContentRender component
* chore: typing, rename Context to Delete Button
* chore(DeleteButton): logging
* refactor(Action): make accessible
* style(Action): improve a11y again
* refactor: remove use/mention of mongoose sessions
* feat: first pass, sharing agents
* feat: visual indicator for global agent, remove author when serving to non-author
* wip: params
* chore: fix typing issues
* fix(schemas): typing
* refactor: improve accessibility of ListCard component and fix console React warning
* wip: reset templates for non-legacy new convos
* Revert "wip: params"
This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80.
* Revert "refactor: dynamic form elements using react-hook-form Controllers"
This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7.
* fix(Parameters): types and parameter effect update to only update local state to parameters
* refactor: optimize useDebouncedInput hook for better performance
* feat: first pass, anthropic bedrock params
* chore: paramEndpoints check for endpointType too
* fix: maxTokens to use coerceNumber.optional(),
* feat: extra chat model params
* chore: reduce code repetition
* refactor: improve preset title handling in SaveAsPresetDialog component
* refactor: improve preset handling in HeaderOptions component
* chore: improve typing, replace legacy dialog for SaveAsPresetDialog
* feat: save as preset from parameters panel
* fix: multi-search in select dropdown when using Option type
* refactor: update default showDefault value to false in Dynamic components
* feat: Bedrock presets settings
* chore: config, fix agents schema, update config version
* refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION
* refactor: update baseEndpointSchema in config.ts to include baseURL property
* refactor: update createRun function to include req parameter and set streamRate based on provider
* feat: availableRegions via config
* refactor: remove unused demo agent controller file
* WIP: title
* Update @librechat/agents to version 1.5.0
* chore: addTitle.js to handle empty responseText
* feat: support images and titles
* feat: context token updates
* Refactor BaseClient test to use expect.objectContaining
* refactor: add model select, remove header options params, move side panel params below prompts
* chore: update models list, catch title error
* feat: model service for bedrock models (env)
* chore: Remove verbose debug log in AgentClient class following stream
* feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models
* refactor: handle streamRate in `handleLLMNewToken` callback
* chore: AWS Bedrock example config in `.env.example`
* refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers
* refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes
* feat: titleModel support for bedrock
* refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.bedrock);
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
|
|
|
|
return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
|
2024-08-08 23:31:07 -04:00
|
|
|
});
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct completion multipliers for all models', () => {
|
|
|
|
const results = awsModels.map((model) => {
|
🪨 feat: AWS Bedrock support (#3935)
* feat: Add BedrockIcon component to SVG library
* feat: EModelEndpoint.bedrock
* feat: first pass, bedrock chat. note: AgentClient is returning `agents` as conversation.endpoint
* fix: declare endpoint in initialization step
* chore: Update @librechat/agents dependency to version 1.4.5
* feat: backend content aggregation for agents/bedrock
* feat: abort agent requests
* feat: AWS Bedrock icons
* WIP: agent provider schema parsing
* chore: Update EditIcon props type
* refactor(useGenerationsByLatest): make agents and bedrock editable
* refactor: non-assistant message content, parts
* fix: Bedrock response `sender`
* fix: use endpointOption.model_parameters not endpointOption.modelOptions
* fix: types for step handler
* refactor: Update Agents.ToolCallDelta type
* refactor: Remove unnecessary assignment of parentMessageId in AskController
* refactor: remove unnecessary assignment of parentMessageId (agent request handler)
* fix(bedrock/agents): message regeneration
* refactor: dynamic form elements using react-hook-form Controllers
* fix: agent icons/labels for messages
* fix: agent actions
* fix: use of new dynamic tags causing application crash
* refactor: dynamic settings touch-ups
* refactor: update Slider component to allow custom track class name
* refactor: update DynamicSlider component styles
* refactor: use Constants value for GLOBAL_PROJECT_NAME (enum)
* feat: agent share global methods/controllers
* fix: agents query
* fix: `getResponseModel`
* fix: share prompt a11y issue
* refactor: update SharePrompt dialog theme styles
* refactor: explicit typing for SharePrompt
* feat: add agent roles/permissions
* chore: update @librechat/agents dependency to version 1.4.7 for tool_call_ids edge case
* fix(Anthropic): messages.X.content.Y.tool_use.input: Input should be a valid dictionary
* fix: handle text parts with tool_call_ids and empty text
* fix: role initialization
* refactor: don't make instructions required
* refactor: improve typing of Text part
* fix: setShowStopButton for agents route
* chore: remove params for now
* fix: add streamBuffer and streamRate to help prevent 'Overloaded' errors from Anthropic API
* refactor: remove console.log statement in ContentRender component
* chore: typing, rename Context to Delete Button
* chore(DeleteButton): logging
* refactor(Action): make accessible
* style(Action): improve a11y again
* refactor: remove use/mention of mongoose sessions
* feat: first pass, sharing agents
* feat: visual indicator for global agent, remove author when serving to non-author
* wip: params
* chore: fix typing issues
* fix(schemas): typing
* refactor: improve accessibility of ListCard component and fix console React warning
* wip: reset templates for non-legacy new convos
* Revert "wip: params"
This reverts commit f8067e91d4adf7be9e0d9e914aaae79ac4689b80.
* Revert "refactor: dynamic form elements using react-hook-form Controllers"
This reverts commit 2150c4815d8c74a978a4b697aa8f54dc11e035d7.
* fix(Parameters): types and parameter effect update to only update local state to parameters
* refactor: optimize useDebouncedInput hook for better performance
* feat: first pass, anthropic bedrock params
* chore: paramEndpoints check for endpointType too
* fix: maxTokens to use coerceNumber.optional(),
* feat: extra chat model params
* chore: reduce code repetition
* refactor: improve preset title handling in SaveAsPresetDialog component
* refactor: improve preset handling in HeaderOptions component
* chore: improve typing, replace legacy dialog for SaveAsPresetDialog
* feat: save as preset from parameters panel
* fix: multi-search in select dropdown when using Option type
* refactor: update default showDefault value to false in Dynamic components
* feat: Bedrock presets settings
* chore: config, fix agents schema, update config version
* refactor: update AWS region variable name in bedrock options endpoint to BEDROCK_AWS_DEFAULT_REGION
* refactor: update baseEndpointSchema in config.ts to include baseURL property
* refactor: update createRun function to include req parameter and set streamRate based on provider
* feat: availableRegions via config
* refactor: remove unused demo agent controller file
* WIP: title
* Update @librechat/agents to version 1.5.0
* chore: addTitle.js to handle empty responseText
* feat: support images and titles
* feat: context token updates
* Refactor BaseClient test to use expect.objectContaining
* refactor: add model select, remove header options params, move side panel params below prompts
* chore: update models list, catch title error
* feat: model service for bedrock models (env)
* chore: Remove verbose debug log in AgentClient class following stream
* feat(bedrock): track token spend; fix: token rates, value key mapping for AWS models
* refactor: handle streamRate in `handleLLMNewToken` callback
* chore: AWS Bedrock example config in `.env.example`
* refactor: Rename bedrockMeta to bedrockGeneral in settings.ts and use for AI21 and Amazon Bedrock providers
* refactor: Update `.env.example` with AWS Bedrock model IDs URL and additional notes
* feat: titleModel support for bedrock
* refactor: Update `.env.example` with additional notes for AWS Bedrock model IDs
2024-09-09 12:06:59 -04:00
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.bedrock);
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'completion' });
|
|
|
|
return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion;
|
2024-08-08 23:31:07 -04:00
|
|
|
});
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
});
|
|
|
|
});
|
2024-08-17 03:24:09 -04:00
|
|
|
|
2025-01-22 07:50:09 -05:00
|
|
|
describe('Deepseek Model Tests', () => {
|
2025-03-17 16:43:44 -04:00
|
|
|
const deepseekModels = ['deepseek-chat', 'deepseek-coder', 'deepseek-reasoner', 'deepseek.r1'];
|
2025-01-22 07:50:09 -05:00
|
|
|
|
|
|
|
it('should return the correct prompt multipliers for all models', () => {
|
|
|
|
const results = deepseekModels.map((model) => {
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
|
|
|
|
return tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
|
|
|
|
});
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct completion multipliers for all models', () => {
|
|
|
|
const results = deepseekModels.map((model) => {
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'completion' });
|
|
|
|
return tokenValues[valueKey].completion && multiplier === tokenValues[valueKey].completion;
|
|
|
|
});
|
|
|
|
expect(results.every(Boolean)).toBe(true);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return the correct prompt multipliers for reasoning model', () => {
|
|
|
|
const model = 'deepseek-reasoner';
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
expect(valueKey).toBe(model);
|
|
|
|
const multiplier = getMultiplier({ valueKey, tokenType: 'prompt' });
|
|
|
|
const result = tokenValues[valueKey].prompt && multiplier === tokenValues[valueKey].prompt;
|
|
|
|
expect(result).toBe(true);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
2024-08-17 03:24:09 -04:00
|
|
|
describe('getCacheMultiplier', () => {
|
|
|
|
it('should return the correct cache multiplier for a given valueKey and cacheType', () => {
|
2024-12-03 22:25:15 -05:00
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'write' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-5-sonnet'].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'read' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-5-sonnet'].read,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-haiku', cacheType: 'write' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-5-haiku'].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-5-haiku', cacheType: 'read' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-5-haiku'].read,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'write' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-haiku'].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ valueKey: 'claude-3-haiku', cacheType: 'read' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-haiku'].read,
|
|
|
|
);
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
it('should return null if cacheType is provided but not found in cacheTokenValues', () => {
|
|
|
|
expect(
|
|
|
|
getCacheMultiplier({ valueKey: 'claude-3-5-sonnet', cacheType: 'unknownType' }),
|
|
|
|
).toBeNull();
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should derive the valueKey from the model if not provided', () => {
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'write', model: 'claude-3-5-sonnet-20240620' })).toBe(
|
2025-04-14 14:55:59 -04:00
|
|
|
cacheTokenValues['claude-3-5-sonnet'].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'read', model: 'claude-3-haiku-20240307' })).toBe(
|
|
|
|
cacheTokenValues['claude-3-haiku'].read,
|
2024-08-17 03:24:09 -04:00
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return null if only model or cacheType is missing', () => {
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'write' })).toBeNull();
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-3-5-sonnet' })).toBeNull();
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return null if derived valueKey does not match any known patterns', () => {
|
|
|
|
expect(getCacheMultiplier({ cacheType: 'write', model: 'gpt-4-some-other-info' })).toBeNull();
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should handle endpointTokenConfig if provided', () => {
|
|
|
|
const endpointTokenConfig = {
|
|
|
|
'custom-model': {
|
|
|
|
write: 5,
|
|
|
|
read: 1,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
expect(
|
|
|
|
getCacheMultiplier({ model: 'custom-model', cacheType: 'write', endpointTokenConfig }),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(endpointTokenConfig['custom-model'].write);
|
2024-08-17 03:24:09 -04:00
|
|
|
expect(
|
|
|
|
getCacheMultiplier({ model: 'custom-model', cacheType: 'read', endpointTokenConfig }),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(endpointTokenConfig['custom-model'].read);
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
|
|
|
|
it('should return null if model is not found in endpointTokenConfig', () => {
|
|
|
|
const endpointTokenConfig = {
|
|
|
|
'custom-model': {
|
|
|
|
write: 5,
|
|
|
|
read: 1,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
expect(
|
|
|
|
getCacheMultiplier({ model: 'unknown-model', cacheType: 'write', endpointTokenConfig }),
|
|
|
|
).toBeNull();
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should handle models with "bedrock/" prefix', () => {
|
|
|
|
expect(
|
|
|
|
getCacheMultiplier({
|
|
|
|
model: 'bedrock/anthropic.claude-3-5-sonnet-20240620-v1:0',
|
|
|
|
cacheType: 'write',
|
|
|
|
}),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(cacheTokenValues['claude-3-5-sonnet'].write);
|
2024-08-17 03:24:09 -04:00
|
|
|
expect(
|
|
|
|
getCacheMultiplier({
|
|
|
|
model: 'bedrock/anthropic.claude-3-haiku-20240307-v1:0',
|
|
|
|
cacheType: 'read',
|
|
|
|
}),
|
2025-04-14 14:55:59 -04:00
|
|
|
).toBe(cacheTokenValues['claude-3-haiku'].read);
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
});
|
2025-02-06 18:13:18 -05:00
|
|
|
|
|
|
|
describe('Google Model Tests', () => {
|
|
|
|
const googleModels = [
|
2025-05-08 12:12:36 -04:00
|
|
|
'gemini-2.5-pro-preview-05-06',
|
|
|
|
'gemini-2.5-flash-preview-04-17',
|
|
|
|
'gemini-2.5-exp',
|
2025-02-06 18:13:18 -05:00
|
|
|
'gemini-2.0-flash-lite-preview-02-05',
|
|
|
|
'gemini-2.0-flash-001',
|
|
|
|
'gemini-2.0-flash-exp',
|
|
|
|
'gemini-2.0-pro-exp-02-05',
|
|
|
|
'gemini-1.5-flash-8b',
|
|
|
|
'gemini-1.5-flash-thinking',
|
|
|
|
'gemini-1.5-pro-latest',
|
|
|
|
'gemini-1.5-pro-preview-0409',
|
|
|
|
'gemini-pro-vision',
|
|
|
|
'gemini-1.0',
|
|
|
|
'gemini-pro',
|
|
|
|
];
|
|
|
|
|
|
|
|
it('should return the correct prompt and completion rates for all models', () => {
|
|
|
|
const results = googleModels.map((model) => {
|
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.google);
|
|
|
|
const promptRate = getMultiplier({
|
|
|
|
model,
|
|
|
|
tokenType: 'prompt',
|
|
|
|
endpoint: EModelEndpoint.google,
|
|
|
|
});
|
|
|
|
const completionRate = getMultiplier({
|
|
|
|
model,
|
|
|
|
tokenType: 'completion',
|
|
|
|
endpoint: EModelEndpoint.google,
|
|
|
|
});
|
|
|
|
return { model, valueKey, promptRate, completionRate };
|
|
|
|
});
|
|
|
|
|
|
|
|
results.forEach(({ valueKey, promptRate, completionRate }) => {
|
|
|
|
expect(promptRate).toBe(tokenValues[valueKey].prompt);
|
|
|
|
expect(completionRate).toBe(tokenValues[valueKey].completion);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should map to the correct model keys', () => {
|
|
|
|
const expected = {
|
2025-05-08 12:12:36 -04:00
|
|
|
'gemini-2.5-pro-preview-05-06': 'gemini-2.5-pro',
|
|
|
|
'gemini-2.5-flash-preview-04-17': 'gemini-2.5-flash',
|
|
|
|
'gemini-2.5-exp': 'gemini-2.5',
|
2025-02-06 18:13:18 -05:00
|
|
|
'gemini-2.0-flash-lite-preview-02-05': 'gemini-2.0-flash-lite',
|
|
|
|
'gemini-2.0-flash-001': 'gemini-2.0-flash',
|
|
|
|
'gemini-2.0-flash-exp': 'gemini-2.0-flash',
|
|
|
|
'gemini-2.0-pro-exp-02-05': 'gemini-2.0',
|
|
|
|
'gemini-1.5-flash-8b': 'gemini-1.5-flash-8b',
|
|
|
|
'gemini-1.5-flash-thinking': 'gemini-1.5-flash',
|
|
|
|
'gemini-1.5-pro-latest': 'gemini-1.5',
|
|
|
|
'gemini-1.5-pro-preview-0409': 'gemini-1.5',
|
|
|
|
'gemini-pro-vision': 'gemini-pro-vision',
|
|
|
|
'gemini-1.0': 'gemini',
|
|
|
|
'gemini-pro': 'gemini',
|
|
|
|
};
|
|
|
|
|
|
|
|
Object.entries(expected).forEach(([model, expectedKey]) => {
|
|
|
|
const valueKey = getValueKey(model, EModelEndpoint.google);
|
|
|
|
expect(valueKey).toBe(expectedKey);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should handle model names with different formats', () => {
|
|
|
|
const testCases = [
|
|
|
|
{ input: 'google/gemini-pro', expected: 'gemini' },
|
|
|
|
{ input: 'gemini-pro/google', expected: 'gemini' },
|
|
|
|
{ input: 'google/gemini-2.0-flash-lite', expected: 'gemini-2.0-flash-lite' },
|
|
|
|
];
|
|
|
|
|
|
|
|
testCases.forEach(({ input, expected }) => {
|
|
|
|
const valueKey = getValueKey(input, EModelEndpoint.google);
|
|
|
|
expect(valueKey).toBe(expected);
|
|
|
|
expect(
|
|
|
|
getMultiplier({ model: input, tokenType: 'prompt', endpoint: EModelEndpoint.google }),
|
|
|
|
).toBe(tokenValues[expected].prompt);
|
|
|
|
expect(
|
|
|
|
getMultiplier({ model: input, tokenType: 'completion', endpoint: EModelEndpoint.google }),
|
|
|
|
).toBe(tokenValues[expected].completion);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|
2025-02-24 20:08:55 -05:00
|
|
|
|
|
|
|
describe('Grok Model Tests - Pricing', () => {
|
|
|
|
describe('getMultiplier', () => {
|
|
|
|
test('should return correct prompt and completion rates for Grok vision models', () => {
|
|
|
|
const models = ['grok-2-vision-1212', 'grok-2-vision', 'grok-2-vision-latest'];
|
|
|
|
models.forEach((model) => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-2-vision'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-2-vision'].completion,
|
|
|
|
);
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok text models', () => {
|
|
|
|
const models = ['grok-2-1212', 'grok-2', 'grok-2-latest'];
|
|
|
|
models.forEach((model) => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues['grok-2'].prompt);
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-2'].completion,
|
|
|
|
);
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok beta models', () => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-vision-beta'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-vision-beta', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-vision-beta'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-beta', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-beta'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-beta', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-beta'].completion,
|
|
|
|
);
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
2025-04-12 18:46:36 -04:00
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 3 models', () => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model: 'grok-3', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3-fast'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3-fast', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3-fast'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3-mini'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3-mini'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3-mini-fast'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-3-mini-fast', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3-mini-fast'].completion,
|
|
|
|
);
|
2025-04-12 18:46:36 -04:00
|
|
|
});
|
|
|
|
|
2025-07-11 03:24:13 -04:00
|
|
|
test('should return correct prompt and completion rates for Grok 4 model', () => {
|
|
|
|
expect(getMultiplier({ model: 'grok-4-0709', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-4'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'grok-4-0709', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-4'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2025-04-12 18:46:36 -04:00
|
|
|
test('should return correct prompt and completion rates for Grok 3 models with prefixes', () => {
|
2025-04-14 14:55:59 -04:00
|
|
|
expect(getMultiplier({ model: 'xai/grok-3', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3-fast'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-fast', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3-fast'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3-mini'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3-mini'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini-fast', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-3-mini-fast'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-3-mini-fast', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-3-mini-fast'].completion,
|
|
|
|
);
|
2025-04-12 18:46:36 -04:00
|
|
|
});
|
2025-07-11 03:24:13 -04:00
|
|
|
|
|
|
|
test('should return correct prompt and completion rates for Grok 4 model with prefixes', () => {
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-0709', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['grok-4'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'xai/grok-4-0709', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['grok-4'].completion,
|
|
|
|
);
|
|
|
|
});
|
2025-02-24 20:08:55 -05:00
|
|
|
});
|
|
|
|
});
|
2025-05-22 15:00:44 -04:00
|
|
|
|
|
|
|
describe('Claude Model Tests', () => {
|
|
|
|
it('should return correct prompt and completion rates for Claude 4 models', () => {
|
|
|
|
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['claude-sonnet-4'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'claude-sonnet-4', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['claude-sonnet-4'].completion,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'claude-opus-4', tokenType: 'prompt' })).toBe(
|
|
|
|
tokenValues['claude-opus-4'].prompt,
|
|
|
|
);
|
|
|
|
expect(getMultiplier({ model: 'claude-opus-4', tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues['claude-opus-4'].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should handle Claude 4 model name variations with different prefixes and suffixes', () => {
|
|
|
|
const modelVariations = [
|
|
|
|
'claude-sonnet-4',
|
|
|
|
'claude-sonnet-4-20240229',
|
|
|
|
'claude-sonnet-4-latest',
|
|
|
|
'anthropic/claude-sonnet-4',
|
|
|
|
'claude-sonnet-4/anthropic',
|
|
|
|
'claude-sonnet-4-preview',
|
|
|
|
'claude-sonnet-4-20240229-preview',
|
|
|
|
'claude-opus-4',
|
|
|
|
'claude-opus-4-20240229',
|
|
|
|
'claude-opus-4-latest',
|
|
|
|
'anthropic/claude-opus-4',
|
|
|
|
'claude-opus-4/anthropic',
|
|
|
|
'claude-opus-4-preview',
|
|
|
|
'claude-opus-4-20240229-preview',
|
|
|
|
];
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
const valueKey = getValueKey(model);
|
|
|
|
const isSonnet = model.includes('sonnet');
|
|
|
|
const expectedKey = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
|
|
|
|
|
|
|
|
expect(valueKey).toBe(expectedKey);
|
|
|
|
expect(getMultiplier({ model, tokenType: 'prompt' })).toBe(tokenValues[expectedKey].prompt);
|
|
|
|
expect(getMultiplier({ model, tokenType: 'completion' })).toBe(
|
|
|
|
tokenValues[expectedKey].completion,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should return correct cache rates for Claude 4 models', () => {
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-sonnet-4', cacheType: 'write' })).toBe(
|
|
|
|
cacheTokenValues['claude-sonnet-4'].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-sonnet-4', cacheType: 'read' })).toBe(
|
|
|
|
cacheTokenValues['claude-sonnet-4'].read,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-opus-4', cacheType: 'write' })).toBe(
|
|
|
|
cacheTokenValues['claude-opus-4'].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ model: 'claude-opus-4', cacheType: 'read' })).toBe(
|
|
|
|
cacheTokenValues['claude-opus-4'].read,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
it('should handle Claude 4 model cache rates with different prefixes and suffixes', () => {
|
|
|
|
const modelVariations = [
|
|
|
|
'claude-sonnet-4',
|
|
|
|
'claude-sonnet-4-20240229',
|
|
|
|
'claude-sonnet-4-latest',
|
|
|
|
'anthropic/claude-sonnet-4',
|
|
|
|
'claude-sonnet-4/anthropic',
|
|
|
|
'claude-sonnet-4-preview',
|
|
|
|
'claude-sonnet-4-20240229-preview',
|
|
|
|
'claude-opus-4',
|
|
|
|
'claude-opus-4-20240229',
|
|
|
|
'claude-opus-4-latest',
|
|
|
|
'anthropic/claude-opus-4',
|
|
|
|
'claude-opus-4/anthropic',
|
|
|
|
'claude-opus-4-preview',
|
|
|
|
'claude-opus-4-20240229-preview',
|
|
|
|
];
|
|
|
|
|
|
|
|
modelVariations.forEach((model) => {
|
|
|
|
const isSonnet = model.includes('sonnet');
|
|
|
|
const expectedKey = isSonnet ? 'claude-sonnet-4' : 'claude-opus-4';
|
|
|
|
|
|
|
|
expect(getCacheMultiplier({ model, cacheType: 'write' })).toBe(
|
|
|
|
cacheTokenValues[expectedKey].write,
|
|
|
|
);
|
|
|
|
expect(getCacheMultiplier({ model, cacheType: 'read' })).toBe(
|
|
|
|
cacheTokenValues[expectedKey].read,
|
|
|
|
);
|
|
|
|
});
|
|
|
|
});
|
|
|
|
});
|