2023-12-14 07:49:27 -05:00
|
|
|
const { logger } = require('~/config');
|
2025-05-30 22:18:13 -04:00
|
|
|
const { createTransaction, createStructuredTransaction } = require('./Transaction');
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
/**
|
|
|
|
* Creates up to two transactions to record the spending of tokens.
|
|
|
|
*
|
|
|
|
* @function
|
|
|
|
* @async
|
2025-08-26 12:10:18 -04:00
|
|
|
* @param {txData} txData - Transaction data.
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
* @param {Object} tokenUsage - The number of tokens used.
|
|
|
|
* @param {Number} tokenUsage.promptTokens - The number of prompt tokens used.
|
|
|
|
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
|
|
|
|
* @returns {Promise<void>} - Returns nothing.
|
|
|
|
* @throws {Error} - Throws an error if there's an issue creating the transactions.
|
|
|
|
*/
|
|
|
|
const spendTokens = async (txData, tokenUsage) => {
|
|
|
|
const { promptTokens, completionTokens } = tokenUsage;
|
2024-03-23 20:21:40 -04:00
|
|
|
logger.debug(
|
|
|
|
`[spendTokens] conversationId: ${txData.conversationId}${
|
|
|
|
txData?.context ? ` | Context: ${txData?.context}` : ''
|
|
|
|
} | Token usage: `,
|
|
|
|
{
|
|
|
|
promptTokens,
|
|
|
|
completionTokens,
|
|
|
|
},
|
|
|
|
);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
let prompt, completion;
|
|
|
|
try {
|
2024-08-24 04:36:08 -04:00
|
|
|
if (promptTokens !== undefined) {
|
2025-05-30 22:18:13 -04:00
|
|
|
prompt = await createTransaction({
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
...txData,
|
|
|
|
tokenType: 'prompt',
|
2025-03-22 17:54:25 -04:00
|
|
|
rawAmount: promptTokens === 0 ? 0 : -Math.max(promptTokens, 0),
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-08-24 04:36:08 -04:00
|
|
|
if (completionTokens !== undefined) {
|
2025-05-30 22:18:13 -04:00
|
|
|
completion = await createTransaction({
|
2024-08-24 04:36:08 -04:00
|
|
|
...txData,
|
|
|
|
tokenType: 'completion',
|
2025-03-22 17:54:25 -04:00
|
|
|
rawAmount: completionTokens === 0 ? 0 : -Math.max(completionTokens, 0),
|
2024-08-24 04:36:08 -04:00
|
|
|
});
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
}
|
|
|
|
|
2024-08-24 04:36:08 -04:00
|
|
|
if (prompt || completion) {
|
✨ feat: Assistants API, General File Support, Side Panel, File Explorer (#1696)
* feat: assistant name/icon in Landing & Header
* feat: assistname in textarea placeholder, and use `Assistant` as default name
* feat: display non-image files in user messages
* fix: only render files if files.length is > 0
* refactor(config -> file-config): move file related configuration values to separate module, add excel types
* chore: spreadsheet file rendering
* fix(Landing): dark mode style for Assistant Name
* refactor: move progress incrementing to own hook, start smaller, cap near limit \(1\)
* refactor(useContentHandler): add empty Text part if last part was completed tool or image
* chore: add accordion trigger border styling for dark mode
* feat: Assistant Builder model selection
* chore: use Spinner when Assistant is mutating
* fix(get/assistants): return correct response object `AssistantListResponse`
* refactor(Spinner): pass size as prop
* refactor: make assistant crud mutations optimistic, add types for options
* chore: remove assistants route and view
* chore: move assistant builder components to separate directory
* feat(ContextButton): delete Assistant via context button/dialog, add localization
* refactor: conditionally show use and context menu buttons, add localization for create assistant
* feat: save side panel states to localStorage
* style(SidePanel): improve avatar menu and assistant select styling for dark mode
* refactor: make NavToggle reusable for either side (left or right), add SidePanel Toggle with ability to close it completely
* fix: resize handle and navToggle behavior
* fix(/avatar/:assistant_id): await `deleteFile` and assign unique name to uploaded image
* WIP: file UI components from PR #576
* refactor(OpenAIMinimalIcon): pass className
* feat: formatDate helper fn
* feat: DataTableColumnHeader
* feat: add row selection, formatted row values, number of rows selected
* WIP: add files to Side panel temporarily
* feat: `LB_QueueAsyncCall`: Leaky Bucket queue for external APIs, use in `processDeleteRequest`
* fix(TFile): correct `source` type with `FileSources`
* fix(useFileHandling): use `continue` instead of return when iterating multiple files, add file type to extendedFile
* chore: add generic setter type
* refactor(processDeleteRequest): settle promises to prevent rejections from processing deletions, log errors
* feat: `useFileDeletion` to reuse file deletion logic
* refactor(useFileDeletion): make `setFiles` an optional param and use object as param
* feat: useDeleteFilesFromTable
* feat: use real `files` data and add deletion action to data table
* fix(Table): make headers sticky
* feat: add dynamic filtering for columns; only show to user Host or OpenAI storage type
* style(DropdownMenu): replace `slate` with `gray`
* style(DataTable): apply dark mode themes and other misc styling
* style(Columns): add color to OpenAI Storage option
* refactor(FileContainer): make file preview reusable
* refactor(Images): make image preview reusable
* refactor(FilePreview): make file prop optional for FileIcon and FilePreview, fix relative style
* feat(Columns): add file/image previews, set a minimum size to show for file size in bytes
* WIP: File Panel with real files and formatted
* feat: open files dialog from panel
* style: file data table mobile and general column styling fixes
* refactor(api/files): return files sorted by the most recently updated
* refactor: provide fileMap through context to prevent re-selecting files to map in different areas; remove unused imports commented out in PanelColumns
* refactor(ExtendFile): make File type optional, add `attached` to prevent attached files from being deleted on remove, make Message.files a partial TFile type
* feat: attach files through file panel
* refactor(useFileHandling): move files to the start of cache list when uploaded
* refactor(useDeleteFilesMutation): delete files from cache when successfully deleted from server
* fix(FileRow): handle possible edge case of duplication due to attaching recently uploaded file
* style(SidePanel): make resize grip border transparent, remove unnecessary styling on close sidepanel button
* feat: action utilities and tests
* refactor(actions): add `ValidationResult` type and change wording for no server URL found
* refactor(actions): check for empty server URL
* fix(data-provider): revert tsconfig to fix type issue resolution
* feat(client): first pass of actions input for assistants
* refactor(FunctionSignature): change method to output object instead of string
* refactor(models/Assistant): add actions field to schema, use searchParams object for methods, and add `getAssistant`
* feat: post actions input first pass
- create new Action document
- add actions to Assistant DB document
- create /action/:assistant_id POST route
- pass more props down from PanelSwitcher, derive assistant_id from switcher
- move privacy policy to ActionInput
- reset data on input change/validation
- add `useUpdateAction`
- conform FunctionSignature type to FunctionTool
- add action, assistant doc, update hook related types
* refactor: optimize assistant/actions relationship
- past domain in metadata as hostname and not a URL
- include domain in tool name
- add `getActions` for actions retrieval by user
- add `getAssistants` for assistant docs retrieval by user
- add `assistant_id` to Action schema
- move actions to own module as a subroute to `api/assistants`
- add `useGetActionsQuery` and `useGetAssistantDocsQuery` hooks
- fix Action type def
* feat: show assistant actions in assistant builder
* feat: switch to actions on action click, editing action styling
* fix: add Assistant state for builder panel to allow immediate selection of newly created assistants as well as retaining the current assistant when switching to a different panel within the builder
* refactor(SidePanel/NavToggle): offset less from right when SidePanel is completely collapsed
* chore: rename `processActions` -> `processRequiredActions`
* chore: rename Assistant API Action to RequiredAction
* refactor(actions): avoid nesting actual API params under generic `requestBody` to optimize LLM token usage
* fix(handleTools): avoid calling `validTool` if not defined, add optional param to skip the loading of specs, which throws an error in the context of assistants
* WIP: working first pass of toolCalls generated from openapi specs
* WIP: first pass ToolCall styling
* feat: programmatic iv encryption/decryption helpers
* fix: correct ActionAuth types/enums, and define type for AuthForm
* feat: encryption/decryption helpers for Action AuthMetadata
* refactor(getActions): remove sensitive fields from query response
* refactor(POST/actions): encrypt and remove sensitive fields from mutation response
* fix(ActionService): change ESM import to CJS
* feat: frontend auth handling for actions + optimistic update on action update/creation
* refactor(actions): use the correct variables and types for setAuth method
* refactor: POST /:assistant_id action can now handle updating an existing action, add `saved_auth_fields` to determine when user explicitly saves new auth creds. only send auth metadata if user explicitly saved fields
* refactor(createActionTool): catch errors and send back meaningful error message, add flag to `getActions` to determine whether to retrieve sensitive values or not
* refactor(ToolService): add `action` property to ToolCall PartMetadata to determine if the tool call was an action, fix parsing function name issue with actionDelimiter
* fix(ActionRequest): use URL class to correctly join endpoint parts for `execute` call
* feat: delete assistant actions
* refactor: conditionally show Available actions
* refactor: show `retrieval` and `code_interpreter` as Capabilities, swap `Switch` for `Checkbox`
* chore: remove shadow-stroke from messages
* WIP: first pass of Assistants Knowledge attachments
* refactor: remove AssistantsProvider in favor of FormProvider, fix selectedAssistant re-render bug, map Assistant file_ids to files via fileMap, initialize Knowledge component with mapped files if any exist
* fix: prevent deleting files on assistant file upload
* chore: remove console.log
* refactor(useUploadFileMutation): update files and assistants cache on upload
* chore: disable oauth option as not supported yet
* feat: cancel assistant runs
* refactor: initialize OpenAI client with helper function, resolve all related circular dependencies
* fix(DALL-E): initialization
* fix(process): openai client initialization
* fix: select an existing Assistant when the active one is deleted
* chore: allow attaching files for assistant endpoint, send back relevant OpenAI error message when uploading, deconstruct openAI initialization correctly, add `message_file` to formData when a file is attached to the message but not the assistant
* fix: add assistant_id on newConvo
* fix(initializeClient): import fix
* chore: swap setAssistant for setOption in useEffect
* fix(DALL-E): add processFileURL to loadTools call
* chore: add customConfig to debug logs
* feat: delete threads on convo delete
* chore: replace Assistants icon
* chore: remove console.dir() in `abortRun`
* feat(AssistantService): accumulate text values from run in openai.responseText
* feat: titling for assistants endpoint
* chore: move panel file components to appropriate directory, add file checks for attaching files, change icon for Attach Files
* refactor: add localizations to tools, plugins, add condition for adding/remove user plugins so tool selections don't affect this value
* chore: disable `import from url` action for now
* chore: remove textMimeTypes from default fileConfig for now
* fix: catch tool errors and send as outputs with error messages
* fix: React warning about button as descendant of button
* style: retrieval and cancelled icon
* WIP: pass isSubmitting to Parts, use InProgressCall to display cancelled tool calls correctly, show domain/function name
* fix(meilisearch): fix `postSaveHook` issue where indexing expects a mongo document, and join all text content parts for meili indexing
* ci: fix dall-e tests
* ci: fix client tests
* fix: button types in actions panel
* fix: plugin auth form persisting across tool selections
* fix(ci): update AppService spec with `loadAndFormatTools`
* fix(clearConvos): add id check earlier on
* refactor(AssistantAvatar): set previewURL dynamically when emtadata.avatar changes
* feat(assistants): addTitle cache setting
* fix(useSSE): resolve rebase conflicts
* fix: delete mutation
* style(SidePanel): make grip visible on active and hover, invisible otherwise
* ci: add data-provider tests to workflow, also update eslint/tsconfig to recognize specs, and add `text/csv` to fileConfig
* fix: handle edge case where auth object is undefined, and log errors
* refactor(actions): resolve schemas, add tests for resolving refs, import specs from separate file for tests
* chore: remove comment
* fix(ActionsInput): re-render bug when initializing states with action fields
* fix(patch/assistant): filter undefined tools
* chore: add logging for errors in assistants routes
* fix(updateAssistant): map actions to functions to avoid overwriting
* fix(actions): properly handle GET paths
* fix(convos): unhandled delete thread exception
* refactor(AssistantService): pass both thread_id and conversationId when sending intermediate assistant messages, remove `mapMessagesToSteps` from AssistantService
* refactor(useSSE): replace all messages with runMessages and pass latestMessageId to abortRun; fix(checkMessageGaps): include tool calls when syncing messages
* refactor(assistants/chat): invoke `createOnTextProgress` after thread creation
* chore: add typing
* style: sidepanel styling
* style: action tool call domain styling
* feat(assistants): default models, limit retrieval to certain models, add env variables to to env.example
* feat: assistants api key in EndpointService
* refactor: set assistant model to conversation on assistant switch
* refactor: set assistant model to conversation on assistant select from panel
* fix(retrieveAndProcessFile): catch attempt to download file with `assistant` purpose which is not allowed; add logging
* feat: retrieval styling, handling, and logging
* chore: rename ASSISTANTS_REVERSE_PROXY to ASSISTANTS_BASE_URL
* feat: FileContext for file metadata
* feat: context file mgmt and filtering
* style(Select): hover/rounded changes
* refactor: explicit conversation switch, endpoint dependent, through `useSelectAssistant`, which does not create new chat if current endpoint is assistant endpoint
* fix(AssistantAvatar): make empty previewURL if no avatar present
* refactor: side panel mobile styling
* style: merge tool and action section, optimize mobile styling for action/tool buttons
* fix: localStorage issues
* fix(useSelectAssistant): invoke react query hook directly in select hook as Map was not being updated in time
* style: light mode fixes
* fix: prevent sidepanel nav styling from shifting layout up
* refactor: change default layout (collapsed by default)
* style: mobile optimization of DataTable
* style: datatable
* feat: client-side hide right-side panel
* chore(useNewConvo): add partial typing for preset
* fix(useSelectAssistant): pass correct model name by using template as preset
* WIP: assistant presets
* refactor(ToolService): add native solution for `TavilySearchResults` and log tool output errors
* refactor: organize imports and use native TavilySearchResults
* fix(TavilySearchResults): stringify result
* fix(ToolCall): show tool call outputs when not an action
* chore: rename Prompt Prefix to custom instructions (in user facing text only)
* refactor(EditPresetDialog): Optimize setting title by debouncing, reset preset on dialog close to avoid state mixture
* feat: add `presetOverride` to overwrite active conversation settings when saving a Preset (relevant for client side updates only)
* feat: Assistant preset settings (client-side)
* fix(Switcher): only set assistant_id and model if current endpoint is Assistants
* feat: use `useDebouncedInput` for updating conversation settings, starting with EditPresetDialog title setting and Assistant instructions setting
* feat(Assistants): add instructions field to settings
* feat(chat/assistants): pass conversation settings to run body
* wip: begin localization and only allow actions if the assistant is created
* refactor(AssistantsPanel): knowledge localization, allow tools on creation
* feat: experimental: allow 'priming' values before assistant is created, that would normally require an assistant_id to be defined
* chore: trim console logs and make more meaningful
* chore: toast messages
* fix(ci): date test
* feat: create file when uploading Assistant Avatar
* feat: file upload rate limiting from custom config with dynamic file route initialization
* refactor: use file upload limiters on post routes only
* refactor(fileConfig): add endpoints field for endpoint specific fileconfigs, add mergeConfig function, add tests
* refactor: fileConfig route, dynamic multer instances used on all '/' and '/images' POST routes, data service and query hook
* feat: supportedMimeTypesSchema, test for array of regex
* feat: configurable file config limits
* chore: clarify assistants file knowledge prereq.
* chore(useTextarea): default to localized 'Assistant' if assistant name is empty
* feat: configurable file limits and toggle file upload per endpoint
* fix(useUploadFileMutation): prevent updating assistant.files cache if file upload is a message_file attachment
* fix(AssistantSelect): set last selected assistant only when timeout successfully runs
* refactor(queries): disable assistant queries if assistants endpoint is not enabled
* chore(Switcher): add localization
* chore: pluralize `assistant` for `EModelEndpoint key and value
* feat: show/hide assistant UI components based on endpoint availability; librechat.yaml config for disabling builder section and setting polling/timeout intervals
* fix(compactEndpointSchemas): use EModelEndpoint for schema access
* feat(runAssistant): use configured values from `librechat.yaml` for `pollIntervalMs` and `timeout`
* fix: naming issue
* wip: revert landing
* 🎉 happy birthday LibreChat (#1768)
* happy birthday LibreChat
* Refactor endpoint condition in Landing component
* Update birthday message in Eng.tsx
* fix(/config): avoid nesting ternaries
* refactor(/config): check birthday
---------
Co-authored-by: Danny Avila <messagedaniel@protonmail.com>
* fix: landing
* fix: landing
* fix(useMessageHelpers): hardcoded check to use EModelEndpoint instead
* fix(ci): convo test revert to main
* fix(assistants/chat): fix issue where assistant_id was being saved as model for convo
* chore: added logging, promises racing to prevent longer timeouts, explicit setting of maxRetries and timeouts, robust catching of invalid abortRun params
* refactor: use recoil state for `showStopButton` and only show for assistants endpoint after syncing conversation data
* refactor: optimize abortRun strategy using localStorage, refactor `abortConversation` to use async/await and await the result, refactor how the abortKey cache is set for runs
* fix(checkMessageGaps): assign `assistant_id` to synced messages if defined; prevents UI from showing blank assistant for cancelled messages
* refactor: re-order sequence of chat route, only allow aborting messages after run is created, cancel abortRun if there was a cancelling error (likely due already cancelled in chat route), and add extra logging
* chore(typedefs): add httpAgent type to OpenAIClient
* refactor: use custom implementation of retrieving run with axios to allow for timing out run query
* fix(waitForRun): handle timed out run retrieval query
* refactor: update preset conditions:
- presets will retain settings when a different endpoint is selected; for existing convos, either when modular or is assistant switch
- no longer use `navigateToConvo` on preset select
* fix: temporary calculator hack as expects string input when invoked
* fix: cancel abortRun only when cancelling error is a result of the run already being cancelled
* chore: remove use of `fileMaxSizeMB` and total counterpart (redundant)
* docs: custom config documentation update
* docs: assistants api setup and dotenv, new custom config fields
* refactor(Switcher): make Assistant switcher sticky in SidePanel
* chore(useSSE): remove console log of data and message index
* refactor(AssistantPanel): button styling and add secondary select button to bottom of panel
* refactor(OpenAIClient): allow passing conversationId to RunManager through titleConvo and initializeLLM to properly record title context tokens used in cases where conversationId was not defined by the client
* feat(assistants): token tracking for assistant runs
* chore(spendTokens): improve logging
* feat: support/exclude specific assistant Ids
* chore: add update `librechat.example.yaml`, optimize `AppService` handling, new tests for `AppService`, optimize missing/outdate config logging
* chore: mount docker logs to root of project
* chore: condense axios errors
* chore: bump vite
* chore: vite hot reload fix using latest version
* chore(getOpenAIModels): sort instruct models to the end of models list
* fix(assistants): user provided key
* fix(assistants): user provided key, invalidate more queries on revoke
---------
Co-authored-by: Marco Beretta <81851188+Berry-13@users.noreply.github.com>
2024-02-13 20:42:27 -05:00
|
|
|
logger.debug('[spendTokens] Transaction data record against balance:', {
|
2024-04-20 15:02:56 -04:00
|
|
|
user: txData.user,
|
2024-08-24 04:36:08 -04:00
|
|
|
prompt: prompt?.prompt,
|
|
|
|
promptRate: prompt?.rate,
|
|
|
|
completion: completion?.completion,
|
|
|
|
completionRate: completion?.rate,
|
|
|
|
balance: completion?.balance ?? prompt?.balance,
|
✨ feat: Assistants API, General File Support, Side Panel, File Explorer (#1696)
* feat: assistant name/icon in Landing & Header
* feat: assistname in textarea placeholder, and use `Assistant` as default name
* feat: display non-image files in user messages
* fix: only render files if files.length is > 0
* refactor(config -> file-config): move file related configuration values to separate module, add excel types
* chore: spreadsheet file rendering
* fix(Landing): dark mode style for Assistant Name
* refactor: move progress incrementing to own hook, start smaller, cap near limit \(1\)
* refactor(useContentHandler): add empty Text part if last part was completed tool or image
* chore: add accordion trigger border styling for dark mode
* feat: Assistant Builder model selection
* chore: use Spinner when Assistant is mutating
* fix(get/assistants): return correct response object `AssistantListResponse`
* refactor(Spinner): pass size as prop
* refactor: make assistant crud mutations optimistic, add types for options
* chore: remove assistants route and view
* chore: move assistant builder components to separate directory
* feat(ContextButton): delete Assistant via context button/dialog, add localization
* refactor: conditionally show use and context menu buttons, add localization for create assistant
* feat: save side panel states to localStorage
* style(SidePanel): improve avatar menu and assistant select styling for dark mode
* refactor: make NavToggle reusable for either side (left or right), add SidePanel Toggle with ability to close it completely
* fix: resize handle and navToggle behavior
* fix(/avatar/:assistant_id): await `deleteFile` and assign unique name to uploaded image
* WIP: file UI components from PR #576
* refactor(OpenAIMinimalIcon): pass className
* feat: formatDate helper fn
* feat: DataTableColumnHeader
* feat: add row selection, formatted row values, number of rows selected
* WIP: add files to Side panel temporarily
* feat: `LB_QueueAsyncCall`: Leaky Bucket queue for external APIs, use in `processDeleteRequest`
* fix(TFile): correct `source` type with `FileSources`
* fix(useFileHandling): use `continue` instead of return when iterating multiple files, add file type to extendedFile
* chore: add generic setter type
* refactor(processDeleteRequest): settle promises to prevent rejections from processing deletions, log errors
* feat: `useFileDeletion` to reuse file deletion logic
* refactor(useFileDeletion): make `setFiles` an optional param and use object as param
* feat: useDeleteFilesFromTable
* feat: use real `files` data and add deletion action to data table
* fix(Table): make headers sticky
* feat: add dynamic filtering for columns; only show to user Host or OpenAI storage type
* style(DropdownMenu): replace `slate` with `gray`
* style(DataTable): apply dark mode themes and other misc styling
* style(Columns): add color to OpenAI Storage option
* refactor(FileContainer): make file preview reusable
* refactor(Images): make image preview reusable
* refactor(FilePreview): make file prop optional for FileIcon and FilePreview, fix relative style
* feat(Columns): add file/image previews, set a minimum size to show for file size in bytes
* WIP: File Panel with real files and formatted
* feat: open files dialog from panel
* style: file data table mobile and general column styling fixes
* refactor(api/files): return files sorted by the most recently updated
* refactor: provide fileMap through context to prevent re-selecting files to map in different areas; remove unused imports commented out in PanelColumns
* refactor(ExtendFile): make File type optional, add `attached` to prevent attached files from being deleted on remove, make Message.files a partial TFile type
* feat: attach files through file panel
* refactor(useFileHandling): move files to the start of cache list when uploaded
* refactor(useDeleteFilesMutation): delete files from cache when successfully deleted from server
* fix(FileRow): handle possible edge case of duplication due to attaching recently uploaded file
* style(SidePanel): make resize grip border transparent, remove unnecessary styling on close sidepanel button
* feat: action utilities and tests
* refactor(actions): add `ValidationResult` type and change wording for no server URL found
* refactor(actions): check for empty server URL
* fix(data-provider): revert tsconfig to fix type issue resolution
* feat(client): first pass of actions input for assistants
* refactor(FunctionSignature): change method to output object instead of string
* refactor(models/Assistant): add actions field to schema, use searchParams object for methods, and add `getAssistant`
* feat: post actions input first pass
- create new Action document
- add actions to Assistant DB document
- create /action/:assistant_id POST route
- pass more props down from PanelSwitcher, derive assistant_id from switcher
- move privacy policy to ActionInput
- reset data on input change/validation
- add `useUpdateAction`
- conform FunctionSignature type to FunctionTool
- add action, assistant doc, update hook related types
* refactor: optimize assistant/actions relationship
- past domain in metadata as hostname and not a URL
- include domain in tool name
- add `getActions` for actions retrieval by user
- add `getAssistants` for assistant docs retrieval by user
- add `assistant_id` to Action schema
- move actions to own module as a subroute to `api/assistants`
- add `useGetActionsQuery` and `useGetAssistantDocsQuery` hooks
- fix Action type def
* feat: show assistant actions in assistant builder
* feat: switch to actions on action click, editing action styling
* fix: add Assistant state for builder panel to allow immediate selection of newly created assistants as well as retaining the current assistant when switching to a different panel within the builder
* refactor(SidePanel/NavToggle): offset less from right when SidePanel is completely collapsed
* chore: rename `processActions` -> `processRequiredActions`
* chore: rename Assistant API Action to RequiredAction
* refactor(actions): avoid nesting actual API params under generic `requestBody` to optimize LLM token usage
* fix(handleTools): avoid calling `validTool` if not defined, add optional param to skip the loading of specs, which throws an error in the context of assistants
* WIP: working first pass of toolCalls generated from openapi specs
* WIP: first pass ToolCall styling
* feat: programmatic iv encryption/decryption helpers
* fix: correct ActionAuth types/enums, and define type for AuthForm
* feat: encryption/decryption helpers for Action AuthMetadata
* refactor(getActions): remove sensitive fields from query response
* refactor(POST/actions): encrypt and remove sensitive fields from mutation response
* fix(ActionService): change ESM import to CJS
* feat: frontend auth handling for actions + optimistic update on action update/creation
* refactor(actions): use the correct variables and types for setAuth method
* refactor: POST /:assistant_id action can now handle updating an existing action, add `saved_auth_fields` to determine when user explicitly saves new auth creds. only send auth metadata if user explicitly saved fields
* refactor(createActionTool): catch errors and send back meaningful error message, add flag to `getActions` to determine whether to retrieve sensitive values or not
* refactor(ToolService): add `action` property to ToolCall PartMetadata to determine if the tool call was an action, fix parsing function name issue with actionDelimiter
* fix(ActionRequest): use URL class to correctly join endpoint parts for `execute` call
* feat: delete assistant actions
* refactor: conditionally show Available actions
* refactor: show `retrieval` and `code_interpreter` as Capabilities, swap `Switch` for `Checkbox`
* chore: remove shadow-stroke from messages
* WIP: first pass of Assistants Knowledge attachments
* refactor: remove AssistantsProvider in favor of FormProvider, fix selectedAssistant re-render bug, map Assistant file_ids to files via fileMap, initialize Knowledge component with mapped files if any exist
* fix: prevent deleting files on assistant file upload
* chore: remove console.log
* refactor(useUploadFileMutation): update files and assistants cache on upload
* chore: disable oauth option as not supported yet
* feat: cancel assistant runs
* refactor: initialize OpenAI client with helper function, resolve all related circular dependencies
* fix(DALL-E): initialization
* fix(process): openai client initialization
* fix: select an existing Assistant when the active one is deleted
* chore: allow attaching files for assistant endpoint, send back relevant OpenAI error message when uploading, deconstruct openAI initialization correctly, add `message_file` to formData when a file is attached to the message but not the assistant
* fix: add assistant_id on newConvo
* fix(initializeClient): import fix
* chore: swap setAssistant for setOption in useEffect
* fix(DALL-E): add processFileURL to loadTools call
* chore: add customConfig to debug logs
* feat: delete threads on convo delete
* chore: replace Assistants icon
* chore: remove console.dir() in `abortRun`
* feat(AssistantService): accumulate text values from run in openai.responseText
* feat: titling for assistants endpoint
* chore: move panel file components to appropriate directory, add file checks for attaching files, change icon for Attach Files
* refactor: add localizations to tools, plugins, add condition for adding/remove user plugins so tool selections don't affect this value
* chore: disable `import from url` action for now
* chore: remove textMimeTypes from default fileConfig for now
* fix: catch tool errors and send as outputs with error messages
* fix: React warning about button as descendant of button
* style: retrieval and cancelled icon
* WIP: pass isSubmitting to Parts, use InProgressCall to display cancelled tool calls correctly, show domain/function name
* fix(meilisearch): fix `postSaveHook` issue where indexing expects a mongo document, and join all text content parts for meili indexing
* ci: fix dall-e tests
* ci: fix client tests
* fix: button types in actions panel
* fix: plugin auth form persisting across tool selections
* fix(ci): update AppService spec with `loadAndFormatTools`
* fix(clearConvos): add id check earlier on
* refactor(AssistantAvatar): set previewURL dynamically when emtadata.avatar changes
* feat(assistants): addTitle cache setting
* fix(useSSE): resolve rebase conflicts
* fix: delete mutation
* style(SidePanel): make grip visible on active and hover, invisible otherwise
* ci: add data-provider tests to workflow, also update eslint/tsconfig to recognize specs, and add `text/csv` to fileConfig
* fix: handle edge case where auth object is undefined, and log errors
* refactor(actions): resolve schemas, add tests for resolving refs, import specs from separate file for tests
* chore: remove comment
* fix(ActionsInput): re-render bug when initializing states with action fields
* fix(patch/assistant): filter undefined tools
* chore: add logging for errors in assistants routes
* fix(updateAssistant): map actions to functions to avoid overwriting
* fix(actions): properly handle GET paths
* fix(convos): unhandled delete thread exception
* refactor(AssistantService): pass both thread_id and conversationId when sending intermediate assistant messages, remove `mapMessagesToSteps` from AssistantService
* refactor(useSSE): replace all messages with runMessages and pass latestMessageId to abortRun; fix(checkMessageGaps): include tool calls when syncing messages
* refactor(assistants/chat): invoke `createOnTextProgress` after thread creation
* chore: add typing
* style: sidepanel styling
* style: action tool call domain styling
* feat(assistants): default models, limit retrieval to certain models, add env variables to to env.example
* feat: assistants api key in EndpointService
* refactor: set assistant model to conversation on assistant switch
* refactor: set assistant model to conversation on assistant select from panel
* fix(retrieveAndProcessFile): catch attempt to download file with `assistant` purpose which is not allowed; add logging
* feat: retrieval styling, handling, and logging
* chore: rename ASSISTANTS_REVERSE_PROXY to ASSISTANTS_BASE_URL
* feat: FileContext for file metadata
* feat: context file mgmt and filtering
* style(Select): hover/rounded changes
* refactor: explicit conversation switch, endpoint dependent, through `useSelectAssistant`, which does not create new chat if current endpoint is assistant endpoint
* fix(AssistantAvatar): make empty previewURL if no avatar present
* refactor: side panel mobile styling
* style: merge tool and action section, optimize mobile styling for action/tool buttons
* fix: localStorage issues
* fix(useSelectAssistant): invoke react query hook directly in select hook as Map was not being updated in time
* style: light mode fixes
* fix: prevent sidepanel nav styling from shifting layout up
* refactor: change default layout (collapsed by default)
* style: mobile optimization of DataTable
* style: datatable
* feat: client-side hide right-side panel
* chore(useNewConvo): add partial typing for preset
* fix(useSelectAssistant): pass correct model name by using template as preset
* WIP: assistant presets
* refactor(ToolService): add native solution for `TavilySearchResults` and log tool output errors
* refactor: organize imports and use native TavilySearchResults
* fix(TavilySearchResults): stringify result
* fix(ToolCall): show tool call outputs when not an action
* chore: rename Prompt Prefix to custom instructions (in user facing text only)
* refactor(EditPresetDialog): Optimize setting title by debouncing, reset preset on dialog close to avoid state mixture
* feat: add `presetOverride` to overwrite active conversation settings when saving a Preset (relevant for client side updates only)
* feat: Assistant preset settings (client-side)
* fix(Switcher): only set assistant_id and model if current endpoint is Assistants
* feat: use `useDebouncedInput` for updating conversation settings, starting with EditPresetDialog title setting and Assistant instructions setting
* feat(Assistants): add instructions field to settings
* feat(chat/assistants): pass conversation settings to run body
* wip: begin localization and only allow actions if the assistant is created
* refactor(AssistantsPanel): knowledge localization, allow tools on creation
* feat: experimental: allow 'priming' values before assistant is created, that would normally require an assistant_id to be defined
* chore: trim console logs and make more meaningful
* chore: toast messages
* fix(ci): date test
* feat: create file when uploading Assistant Avatar
* feat: file upload rate limiting from custom config with dynamic file route initialization
* refactor: use file upload limiters on post routes only
* refactor(fileConfig): add endpoints field for endpoint specific fileconfigs, add mergeConfig function, add tests
* refactor: fileConfig route, dynamic multer instances used on all '/' and '/images' POST routes, data service and query hook
* feat: supportedMimeTypesSchema, test for array of regex
* feat: configurable file config limits
* chore: clarify assistants file knowledge prereq.
* chore(useTextarea): default to localized 'Assistant' if assistant name is empty
* feat: configurable file limits and toggle file upload per endpoint
* fix(useUploadFileMutation): prevent updating assistant.files cache if file upload is a message_file attachment
* fix(AssistantSelect): set last selected assistant only when timeout successfully runs
* refactor(queries): disable assistant queries if assistants endpoint is not enabled
* chore(Switcher): add localization
* chore: pluralize `assistant` for `EModelEndpoint key and value
* feat: show/hide assistant UI components based on endpoint availability; librechat.yaml config for disabling builder section and setting polling/timeout intervals
* fix(compactEndpointSchemas): use EModelEndpoint for schema access
* feat(runAssistant): use configured values from `librechat.yaml` for `pollIntervalMs` and `timeout`
* fix: naming issue
* wip: revert landing
* 🎉 happy birthday LibreChat (#1768)
* happy birthday LibreChat
* Refactor endpoint condition in Landing component
* Update birthday message in Eng.tsx
* fix(/config): avoid nesting ternaries
* refactor(/config): check birthday
---------
Co-authored-by: Danny Avila <messagedaniel@protonmail.com>
* fix: landing
* fix: landing
* fix(useMessageHelpers): hardcoded check to use EModelEndpoint instead
* fix(ci): convo test revert to main
* fix(assistants/chat): fix issue where assistant_id was being saved as model for convo
* chore: added logging, promises racing to prevent longer timeouts, explicit setting of maxRetries and timeouts, robust catching of invalid abortRun params
* refactor: use recoil state for `showStopButton` and only show for assistants endpoint after syncing conversation data
* refactor: optimize abortRun strategy using localStorage, refactor `abortConversation` to use async/await and await the result, refactor how the abortKey cache is set for runs
* fix(checkMessageGaps): assign `assistant_id` to synced messages if defined; prevents UI from showing blank assistant for cancelled messages
* refactor: re-order sequence of chat route, only allow aborting messages after run is created, cancel abortRun if there was a cancelling error (likely due already cancelled in chat route), and add extra logging
* chore(typedefs): add httpAgent type to OpenAIClient
* refactor: use custom implementation of retrieving run with axios to allow for timing out run query
* fix(waitForRun): handle timed out run retrieval query
* refactor: update preset conditions:
- presets will retain settings when a different endpoint is selected; for existing convos, either when modular or is assistant switch
- no longer use `navigateToConvo` on preset select
* fix: temporary calculator hack as expects string input when invoked
* fix: cancel abortRun only when cancelling error is a result of the run already being cancelled
* chore: remove use of `fileMaxSizeMB` and total counterpart (redundant)
* docs: custom config documentation update
* docs: assistants api setup and dotenv, new custom config fields
* refactor(Switcher): make Assistant switcher sticky in SidePanel
* chore(useSSE): remove console log of data and message index
* refactor(AssistantPanel): button styling and add secondary select button to bottom of panel
* refactor(OpenAIClient): allow passing conversationId to RunManager through titleConvo and initializeLLM to properly record title context tokens used in cases where conversationId was not defined by the client
* feat(assistants): token tracking for assistant runs
* chore(spendTokens): improve logging
* feat: support/exclude specific assistant Ids
* chore: add update `librechat.example.yaml`, optimize `AppService` handling, new tests for `AppService`, optimize missing/outdate config logging
* chore: mount docker logs to root of project
* chore: condense axios errors
* chore: bump vite
* chore: vite hot reload fix using latest version
* chore(getOpenAIModels): sort instruct models to the end of models list
* fix(assistants): user provided key
* fix(assistants): user provided key, invalidate more queries on revoke
---------
Co-authored-by: Marco Beretta <81851188+Berry-13@users.noreply.github.com>
2024-02-13 20:42:27 -05:00
|
|
|
});
|
2024-08-24 04:36:08 -04:00
|
|
|
} else {
|
|
|
|
logger.debug('[spendTokens] No transactions incurred against balance');
|
|
|
|
}
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
} catch (err) {
|
2023-12-14 07:49:27 -05:00
|
|
|
logger.error('[spendTokens]', err);
|
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2024-08-17 03:24:09 -04:00
|
|
|
/**
|
|
|
|
* Creates transactions to record the spending of structured tokens.
|
|
|
|
*
|
|
|
|
* @function
|
|
|
|
* @async
|
2025-08-26 12:10:18 -04:00
|
|
|
* @param {txData} txData - Transaction data.
|
2024-08-17 03:24:09 -04:00
|
|
|
* @param {Object} tokenUsage - The number of tokens used.
|
|
|
|
* @param {Object} tokenUsage.promptTokens - The number of prompt tokens used.
|
|
|
|
* @param {Number} tokenUsage.promptTokens.input - The number of input tokens.
|
|
|
|
* @param {Number} tokenUsage.promptTokens.write - The number of write tokens.
|
|
|
|
* @param {Number} tokenUsage.promptTokens.read - The number of read tokens.
|
|
|
|
* @param {Number} tokenUsage.completionTokens - The number of completion tokens used.
|
|
|
|
* @returns {Promise<void>} - Returns nothing.
|
|
|
|
* @throws {Error} - Throws an error if there's an issue creating the transactions.
|
|
|
|
*/
|
|
|
|
const spendStructuredTokens = async (txData, tokenUsage) => {
|
|
|
|
const { promptTokens, completionTokens } = tokenUsage;
|
|
|
|
logger.debug(
|
|
|
|
`[spendStructuredTokens] conversationId: ${txData.conversationId}${
|
|
|
|
txData?.context ? ` | Context: ${txData?.context}` : ''
|
|
|
|
} | Token usage: `,
|
|
|
|
{
|
|
|
|
promptTokens,
|
|
|
|
completionTokens,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
let prompt, completion;
|
|
|
|
try {
|
|
|
|
if (promptTokens) {
|
|
|
|
const { input = 0, write = 0, read = 0 } = promptTokens;
|
2025-05-30 22:18:13 -04:00
|
|
|
prompt = await createStructuredTransaction({
|
2024-08-17 03:24:09 -04:00
|
|
|
...txData,
|
|
|
|
tokenType: 'prompt',
|
2024-08-24 04:36:08 -04:00
|
|
|
inputTokens: -input,
|
|
|
|
writeTokens: -write,
|
|
|
|
readTokens: -read,
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
if (completionTokens) {
|
2025-05-30 22:18:13 -04:00
|
|
|
completion = await createTransaction({
|
2024-08-17 03:24:09 -04:00
|
|
|
...txData,
|
|
|
|
tokenType: 'completion',
|
|
|
|
rawAmount: -completionTokens,
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2024-08-24 04:36:08 -04:00
|
|
|
if (prompt || completion) {
|
2024-08-17 03:24:09 -04:00
|
|
|
logger.debug('[spendStructuredTokens] Transaction data record against balance:', {
|
|
|
|
user: txData.user,
|
2024-08-24 04:36:08 -04:00
|
|
|
prompt: prompt?.prompt,
|
|
|
|
promptRate: prompt?.rate,
|
|
|
|
completion: completion?.completion,
|
|
|
|
completionRate: completion?.rate,
|
|
|
|
balance: completion?.balance ?? prompt?.balance,
|
2024-08-17 03:24:09 -04:00
|
|
|
});
|
2024-08-24 04:36:08 -04:00
|
|
|
} else {
|
|
|
|
logger.debug('[spendStructuredTokens] No transactions incurred against balance');
|
|
|
|
}
|
2024-08-17 03:24:09 -04:00
|
|
|
} catch (err) {
|
|
|
|
logger.error('[spendStructuredTokens]', err);
|
|
|
|
}
|
2024-08-24 04:36:08 -04:00
|
|
|
|
|
|
|
return { prompt, completion };
|
2024-08-17 03:24:09 -04:00
|
|
|
};
|
|
|
|
|
|
|
|
module.exports = { spendTokens, spendStructuredTokens };
|