2024-01-30 14:34:02 -05:00
// file deepcode ignore HardcodedNonCryptoSecret: No hardcoded secrets
2024-04-21 08:31:54 -04:00
import { ViolationTypes , ErrorTypes } from 'librechat-data-provider' ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
import type { TOpenAIMessage } from 'librechat-data-provider' ;
2024-04-21 08:31:54 -04:00
import type { LocalizeFunction } from '~/common' ;
2023-12-15 02:18:07 -05:00
import { formatJSON , extractJson , isJson } from '~/utils/json' ;
2024-04-21 08:31:54 -04:00
import useLocalize from '~/hooks/useLocalize' ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
import CodeBlock from './CodeBlock' ;
2024-04-21 08:31:54 -04:00
const localizedErrorPrefix = 'com_error' ;
feat: Message Rate Limiters, Violation Logging, & Ban System 🔨 (#903)
* refactor: require Auth middleware in route index files
* feat: concurrent message limiter
* feat: complete concurrent message limiter with caching
* refactor: SSE response methods separated from handleText
* fix(abortMiddleware): fix req and res order to standard, use endpointOption in req.body
* chore: minor name changes
* refactor: add isUUID condition to saveMessage
* fix(concurrentLimiter): logic correctly handles the max number of concurrent messages and res closing/finalization
* chore: bump keyv and remove console.log from Message
* fix(concurrentLimiter): ensure messages are only saved in later message children
* refactor(concurrentLimiter): use KeyvFile instead, could make other stores configurable in the future
* feat: add denyRequest function for error responses
* feat(utils): add isStringTruthy function
Introduce the isStringTruthy function to the utilities module to check if a string value is a case-insensitive match for 'true'
* feat: add optional message rate limiters by IP and userId
* feat: add optional message rate limiters by IP and userId to edit route
* refactor: rename isStringTruthy to isTrue for brevity
* refactor(getError): use map to make code cleaner
* refactor: use memory for concurrent rate limiter to prevent clearing on startup/exit, add multiple log files, fix error message for concurrent violation
* feat: check if errorMessage is object, stringify if so
* chore: send object to denyRequest which will stringify it
* feat: log excessive requests
* fix(getError): correctly pluralize messages
* refactor(limiters): make type consistent between logs and errorMessage
* refactor(cache): move files out of lib/db into separate cache dir
>> feat: add getLogStores function so Keyv instance is not redundantly created on every violation
feat: separate violation logging to own function with logViolation
* fix: cache/index.js export, properly record userViolations
* refactor(messageLimiters): use new logging method, add logging to registrations
* refactor(logViolation): make userLogs an array of logs per user
* feat: add logging to login limiter
* refactor: pass req as first param to logViolation and record offending IP
* refactor: rename isTrue helper fn to isEnabled
* feat: add simple non_browser check and log violation
* fix: open handles in unit tests, remove KeyvMongo as not used and properly mock global fetch
* chore: adjust nodemon ignore paths to properly ignore logs
* feat: add math helper function for safe use of eval
* refactor(api/convos): use middleware at top of file to avoid redundancy
* feat: add delete all static method for Sessions
* fix: redirect to login on refresh if user is not found, or the session is not found but hasn't expired (ban case)
* refactor(getLogStores): adjust return type
* feat: add ban violation and check ban logic
refactor(logViolation): pass both req and res objects
* feat: add removePorts helper function
* refactor: rename getError to getMessageError and add getLoginError for displaying different login errors
* fix(AuthContext): fix type issue and remove unused code
* refactor(bans): ban by ip and user id, send response based on origin
* chore: add frontend ban messages
* refactor(routes/oauth): add ban check to handler, also consolidate logic to avoid redundancy
* feat: add ban check to AI messaging routes
* feat: add ban check to login/registration
* fix(ci/api): mock KeyvMongo to avoid tests hanging
* docs: update .env.example
> refactor(banViolation): calculate interval rate crossover, early return if duration is invalid
ci(banViolation): add tests to ensure users are only banned when expected
* docs: improve wording for mod system
* feat: add configurable env variables for violation scores
* chore: add jsdoc for uaParser.js
* chore: improve ban text log
* chore: update bun test scripts
* refactor(math.js): add fallback values
* fix(KeyvMongo/banLogs): refactor keyv instances to top of files to avoid memory leaks, refactor ban logic to use getLogStores instead
refactor(getLogStores): get a single log store by type
* fix(ci): refactor tests due to banLogs changes, also make sure to clear and revoke sessions even if ban duration is 0
* fix(banViolation.js): getLogStores import
* feat: handle 500 code error at login
* fix(middleware): handle case where user.id is _id and not just id
* ci: add ban secrets for backend unit tests
* refactor: logout user upon ban
* chore: log session delete message only if deletedCount > 0
* refactor: change default ban duration (2h) and make logic more clear in JSDOC
* fix: login and registration limiters will now return rate limiting error
* fix: userId not parsable as non ObjectId string
* feat: add useTimeout hook to properly clear timeouts when invoking functions within them
refactor(AuthContext): cleanup code by using new hook and defining types in ~/common
* fix: login error message for rate limits
* docs: add info for automated mod system and rate limiters, update other docs accordingly
* chore: bump data-provider version
2023-09-13 10:57:07 -04:00
type TConcurrent = {
limit : number ;
} ;
type TMessageLimit = {
max : number ;
windowInMinutes : number ;
} ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
type TTokenBalance = {
2024-04-21 08:31:54 -04:00
type : ViolationTypes | ErrorTypes ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
balance : number ;
tokenCost : number ;
promptTokens : number ;
prev_count : number ;
violation_count : number ;
date : Date ;
generations? : TOpenAIMessage [ ] ;
} ;
2024-04-21 08:31:54 -04:00
type TExpiredKey = {
expiredAt : string ;
endpoint : string ;
} ;
2025-02-06 18:13:18 -05:00
type TGenericError = {
2024-08-30 15:01:29 -04:00
info : string ;
} ;
feat: Message Rate Limiters, Violation Logging, & Ban System 🔨 (#903)
* refactor: require Auth middleware in route index files
* feat: concurrent message limiter
* feat: complete concurrent message limiter with caching
* refactor: SSE response methods separated from handleText
* fix(abortMiddleware): fix req and res order to standard, use endpointOption in req.body
* chore: minor name changes
* refactor: add isUUID condition to saveMessage
* fix(concurrentLimiter): logic correctly handles the max number of concurrent messages and res closing/finalization
* chore: bump keyv and remove console.log from Message
* fix(concurrentLimiter): ensure messages are only saved in later message children
* refactor(concurrentLimiter): use KeyvFile instead, could make other stores configurable in the future
* feat: add denyRequest function for error responses
* feat(utils): add isStringTruthy function
Introduce the isStringTruthy function to the utilities module to check if a string value is a case-insensitive match for 'true'
* feat: add optional message rate limiters by IP and userId
* feat: add optional message rate limiters by IP and userId to edit route
* refactor: rename isStringTruthy to isTrue for brevity
* refactor(getError): use map to make code cleaner
* refactor: use memory for concurrent rate limiter to prevent clearing on startup/exit, add multiple log files, fix error message for concurrent violation
* feat: check if errorMessage is object, stringify if so
* chore: send object to denyRequest which will stringify it
* feat: log excessive requests
* fix(getError): correctly pluralize messages
* refactor(limiters): make type consistent between logs and errorMessage
* refactor(cache): move files out of lib/db into separate cache dir
>> feat: add getLogStores function so Keyv instance is not redundantly created on every violation
feat: separate violation logging to own function with logViolation
* fix: cache/index.js export, properly record userViolations
* refactor(messageLimiters): use new logging method, add logging to registrations
* refactor(logViolation): make userLogs an array of logs per user
* feat: add logging to login limiter
* refactor: pass req as first param to logViolation and record offending IP
* refactor: rename isTrue helper fn to isEnabled
* feat: add simple non_browser check and log violation
* fix: open handles in unit tests, remove KeyvMongo as not used and properly mock global fetch
* chore: adjust nodemon ignore paths to properly ignore logs
* feat: add math helper function for safe use of eval
* refactor(api/convos): use middleware at top of file to avoid redundancy
* feat: add delete all static method for Sessions
* fix: redirect to login on refresh if user is not found, or the session is not found but hasn't expired (ban case)
* refactor(getLogStores): adjust return type
* feat: add ban violation and check ban logic
refactor(logViolation): pass both req and res objects
* feat: add removePorts helper function
* refactor: rename getError to getMessageError and add getLoginError for displaying different login errors
* fix(AuthContext): fix type issue and remove unused code
* refactor(bans): ban by ip and user id, send response based on origin
* chore: add frontend ban messages
* refactor(routes/oauth): add ban check to handler, also consolidate logic to avoid redundancy
* feat: add ban check to AI messaging routes
* feat: add ban check to login/registration
* fix(ci/api): mock KeyvMongo to avoid tests hanging
* docs: update .env.example
> refactor(banViolation): calculate interval rate crossover, early return if duration is invalid
ci(banViolation): add tests to ensure users are only banned when expected
* docs: improve wording for mod system
* feat: add configurable env variables for violation scores
* chore: add jsdoc for uaParser.js
* chore: improve ban text log
* chore: update bun test scripts
* refactor(math.js): add fallback values
* fix(KeyvMongo/banLogs): refactor keyv instances to top of files to avoid memory leaks, refactor ban logic to use getLogStores instead
refactor(getLogStores): get a single log store by type
* fix(ci): refactor tests due to banLogs changes, also make sure to clear and revoke sessions even if ban duration is 0
* fix(banViolation.js): getLogStores import
* feat: handle 500 code error at login
* fix(middleware): handle case where user.id is _id and not just id
* ci: add ban secrets for backend unit tests
* refactor: logout user upon ban
* chore: log session delete message only if deletedCount > 0
* refactor: change default ban duration (2h) and make logic more clear in JSDOC
* fix: login and registration limiters will now return rate limiting error
* fix: userId not parsable as non ObjectId string
* feat: add useTimeout hook to properly clear timeouts when invoking functions within them
refactor(AuthContext): cleanup code by using new hook and defining types in ~/common
* fix: login error message for rate limits
* docs: add info for automated mod system and rate limiters, update other docs accordingly
* chore: bump data-provider version
2023-09-13 10:57:07 -04:00
const errorMessages = {
2024-04-21 08:31:54 -04:00
[ ErrorTypes . MODERATION ] : 'com_error_moderation' ,
[ ErrorTypes . NO_USER_KEY ] : 'com_error_no_user_key' ,
[ ErrorTypes . INVALID_USER_KEY ] : 'com_error_invalid_user_key' ,
[ ErrorTypes . NO_BASE_URL ] : 'com_error_no_base_url' ,
2024-12-12 12:52:42 -05:00
[ ErrorTypes . INVALID_ACTION ] : ` com_error_ ${ ErrorTypes . INVALID_ACTION } ` ,
2024-09-12 18:15:43 -04:00
[ ErrorTypes . INVALID_REQUEST ] : ` com_error_ ${ ErrorTypes . INVALID_REQUEST } ` ,
[ ErrorTypes . NO_SYSTEM_MESSAGES ] : ` com_error_ ${ ErrorTypes . NO_SYSTEM_MESSAGES } ` ,
2024-04-21 08:31:54 -04:00
[ ErrorTypes . EXPIRED_USER_KEY ] : ( json : TExpiredKey , localize : LocalizeFunction ) = > {
const { expiredAt , endpoint } = json ;
2025-02-09 18:05:31 +01:00
return localize ( 'com_error_expired_user_key' , { 0 : endpoint , 1 : expiredAt } ) ;
2024-04-21 08:31:54 -04:00
} ,
2025-02-06 18:13:18 -05:00
[ ErrorTypes . INPUT_LENGTH ] : ( json : TGenericError , localize : LocalizeFunction ) = > {
2024-08-30 15:01:29 -04:00
const { info } = json ;
2025-02-09 18:05:31 +01:00
return localize ( 'com_error_input_length' , { 0 : info } ) ;
2024-08-30 15:01:29 -04:00
} ,
2025-02-06 18:13:18 -05:00
[ ErrorTypes . GOOGLE_ERROR ] : ( json : TGenericError ) = > {
const { info } = json ;
return info ;
} ,
2024-04-21 08:31:54 -04:00
[ ViolationTypes . BAN ] :
'Your account has been temporarily banned due to violations of our service.' ,
feat: Message Rate Limiters, Violation Logging, & Ban System 🔨 (#903)
* refactor: require Auth middleware in route index files
* feat: concurrent message limiter
* feat: complete concurrent message limiter with caching
* refactor: SSE response methods separated from handleText
* fix(abortMiddleware): fix req and res order to standard, use endpointOption in req.body
* chore: minor name changes
* refactor: add isUUID condition to saveMessage
* fix(concurrentLimiter): logic correctly handles the max number of concurrent messages and res closing/finalization
* chore: bump keyv and remove console.log from Message
* fix(concurrentLimiter): ensure messages are only saved in later message children
* refactor(concurrentLimiter): use KeyvFile instead, could make other stores configurable in the future
* feat: add denyRequest function for error responses
* feat(utils): add isStringTruthy function
Introduce the isStringTruthy function to the utilities module to check if a string value is a case-insensitive match for 'true'
* feat: add optional message rate limiters by IP and userId
* feat: add optional message rate limiters by IP and userId to edit route
* refactor: rename isStringTruthy to isTrue for brevity
* refactor(getError): use map to make code cleaner
* refactor: use memory for concurrent rate limiter to prevent clearing on startup/exit, add multiple log files, fix error message for concurrent violation
* feat: check if errorMessage is object, stringify if so
* chore: send object to denyRequest which will stringify it
* feat: log excessive requests
* fix(getError): correctly pluralize messages
* refactor(limiters): make type consistent between logs and errorMessage
* refactor(cache): move files out of lib/db into separate cache dir
>> feat: add getLogStores function so Keyv instance is not redundantly created on every violation
feat: separate violation logging to own function with logViolation
* fix: cache/index.js export, properly record userViolations
* refactor(messageLimiters): use new logging method, add logging to registrations
* refactor(logViolation): make userLogs an array of logs per user
* feat: add logging to login limiter
* refactor: pass req as first param to logViolation and record offending IP
* refactor: rename isTrue helper fn to isEnabled
* feat: add simple non_browser check and log violation
* fix: open handles in unit tests, remove KeyvMongo as not used and properly mock global fetch
* chore: adjust nodemon ignore paths to properly ignore logs
* feat: add math helper function for safe use of eval
* refactor(api/convos): use middleware at top of file to avoid redundancy
* feat: add delete all static method for Sessions
* fix: redirect to login on refresh if user is not found, or the session is not found but hasn't expired (ban case)
* refactor(getLogStores): adjust return type
* feat: add ban violation and check ban logic
refactor(logViolation): pass both req and res objects
* feat: add removePorts helper function
* refactor: rename getError to getMessageError and add getLoginError for displaying different login errors
* fix(AuthContext): fix type issue and remove unused code
* refactor(bans): ban by ip and user id, send response based on origin
* chore: add frontend ban messages
* refactor(routes/oauth): add ban check to handler, also consolidate logic to avoid redundancy
* feat: add ban check to AI messaging routes
* feat: add ban check to login/registration
* fix(ci/api): mock KeyvMongo to avoid tests hanging
* docs: update .env.example
> refactor(banViolation): calculate interval rate crossover, early return if duration is invalid
ci(banViolation): add tests to ensure users are only banned when expected
* docs: improve wording for mod system
* feat: add configurable env variables for violation scores
* chore: add jsdoc for uaParser.js
* chore: improve ban text log
* chore: update bun test scripts
* refactor(math.js): add fallback values
* fix(KeyvMongo/banLogs): refactor keyv instances to top of files to avoid memory leaks, refactor ban logic to use getLogStores instead
refactor(getLogStores): get a single log store by type
* fix(ci): refactor tests due to banLogs changes, also make sure to clear and revoke sessions even if ban duration is 0
* fix(banViolation.js): getLogStores import
* feat: handle 500 code error at login
* fix(middleware): handle case where user.id is _id and not just id
* ci: add ban secrets for backend unit tests
* refactor: logout user upon ban
* chore: log session delete message only if deletedCount > 0
* refactor: change default ban duration (2h) and make logic more clear in JSDOC
* fix: login and registration limiters will now return rate limiting error
* fix: userId not parsable as non ObjectId string
* feat: add useTimeout hook to properly clear timeouts when invoking functions within them
refactor(AuthContext): cleanup code by using new hook and defining types in ~/common
* fix: login error message for rate limits
* docs: add info for automated mod system and rate limiters, update other docs accordingly
* chore: bump data-provider version
2023-09-13 10:57:07 -04:00
invalid_api_key :
'Invalid API key. Please check your API key and try again. You can do this by clicking on the model logo in the left corner of the textbox and selecting "Set Token" for the current selected endpoint. Thank you for your understanding.' ,
insufficient_quota :
'We apologize for any inconvenience caused. The default API key has reached its limit. To continue using this service, please set up your own API key. You can do this by clicking on the model logo in the left corner of the textbox and selecting "Set Token" for the current selected endpoint. Thank you for your understanding.' ,
concurrent : ( json : TConcurrent ) = > {
const { limit } = json ;
const plural = limit > 1 ? 's' : '' ;
return ` Only ${ limit } message ${ plural } at a time. Please allow any other responses to complete before sending another message, or wait one minute. ` ;
} ,
message_limit : ( json : TMessageLimit ) = > {
const { max , windowInMinutes } = json ;
const plural = max > 1 ? 's' : '' ;
return ` You hit the message limit. You have a cap of ${ max } message ${ plural } per ${
windowInMinutes > 1 ? ` ${ windowInMinutes } minutes ` : 'minute'
} . ` ;
} ,
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
token_balance : ( json : TTokenBalance ) = > {
const { balance , tokenCost , promptTokens , generations } = json ;
const message = ` Insufficient Funds! Balance: ${ balance } . Prompt tokens: ${ promptTokens } . Cost: ${ tokenCost } . ` ;
return (
< >
{ message }
{ generations && (
< >
< br / >
< br / >
< / >
) }
{ generations && (
< CodeBlock
lang = "Generations"
error = { true }
codeChildren = { formatJSON ( JSON . stringify ( generations ) ) }
/ >
) }
< / >
) ;
} ,
feat: Message Rate Limiters, Violation Logging, & Ban System 🔨 (#903)
* refactor: require Auth middleware in route index files
* feat: concurrent message limiter
* feat: complete concurrent message limiter with caching
* refactor: SSE response methods separated from handleText
* fix(abortMiddleware): fix req and res order to standard, use endpointOption in req.body
* chore: minor name changes
* refactor: add isUUID condition to saveMessage
* fix(concurrentLimiter): logic correctly handles the max number of concurrent messages and res closing/finalization
* chore: bump keyv and remove console.log from Message
* fix(concurrentLimiter): ensure messages are only saved in later message children
* refactor(concurrentLimiter): use KeyvFile instead, could make other stores configurable in the future
* feat: add denyRequest function for error responses
* feat(utils): add isStringTruthy function
Introduce the isStringTruthy function to the utilities module to check if a string value is a case-insensitive match for 'true'
* feat: add optional message rate limiters by IP and userId
* feat: add optional message rate limiters by IP and userId to edit route
* refactor: rename isStringTruthy to isTrue for brevity
* refactor(getError): use map to make code cleaner
* refactor: use memory for concurrent rate limiter to prevent clearing on startup/exit, add multiple log files, fix error message for concurrent violation
* feat: check if errorMessage is object, stringify if so
* chore: send object to denyRequest which will stringify it
* feat: log excessive requests
* fix(getError): correctly pluralize messages
* refactor(limiters): make type consistent between logs and errorMessage
* refactor(cache): move files out of lib/db into separate cache dir
>> feat: add getLogStores function so Keyv instance is not redundantly created on every violation
feat: separate violation logging to own function with logViolation
* fix: cache/index.js export, properly record userViolations
* refactor(messageLimiters): use new logging method, add logging to registrations
* refactor(logViolation): make userLogs an array of logs per user
* feat: add logging to login limiter
* refactor: pass req as first param to logViolation and record offending IP
* refactor: rename isTrue helper fn to isEnabled
* feat: add simple non_browser check and log violation
* fix: open handles in unit tests, remove KeyvMongo as not used and properly mock global fetch
* chore: adjust nodemon ignore paths to properly ignore logs
* feat: add math helper function for safe use of eval
* refactor(api/convos): use middleware at top of file to avoid redundancy
* feat: add delete all static method for Sessions
* fix: redirect to login on refresh if user is not found, or the session is not found but hasn't expired (ban case)
* refactor(getLogStores): adjust return type
* feat: add ban violation and check ban logic
refactor(logViolation): pass both req and res objects
* feat: add removePorts helper function
* refactor: rename getError to getMessageError and add getLoginError for displaying different login errors
* fix(AuthContext): fix type issue and remove unused code
* refactor(bans): ban by ip and user id, send response based on origin
* chore: add frontend ban messages
* refactor(routes/oauth): add ban check to handler, also consolidate logic to avoid redundancy
* feat: add ban check to AI messaging routes
* feat: add ban check to login/registration
* fix(ci/api): mock KeyvMongo to avoid tests hanging
* docs: update .env.example
> refactor(banViolation): calculate interval rate crossover, early return if duration is invalid
ci(banViolation): add tests to ensure users are only banned when expected
* docs: improve wording for mod system
* feat: add configurable env variables for violation scores
* chore: add jsdoc for uaParser.js
* chore: improve ban text log
* chore: update bun test scripts
* refactor(math.js): add fallback values
* fix(KeyvMongo/banLogs): refactor keyv instances to top of files to avoid memory leaks, refactor ban logic to use getLogStores instead
refactor(getLogStores): get a single log store by type
* fix(ci): refactor tests due to banLogs changes, also make sure to clear and revoke sessions even if ban duration is 0
* fix(banViolation.js): getLogStores import
* feat: handle 500 code error at login
* fix(middleware): handle case where user.id is _id and not just id
* ci: add ban secrets for backend unit tests
* refactor: logout user upon ban
* chore: log session delete message only if deletedCount > 0
* refactor: change default ban duration (2h) and make logic more clear in JSDOC
* fix: login and registration limiters will now return rate limiting error
* fix: userId not parsable as non ObjectId string
* feat: add useTimeout hook to properly clear timeouts when invoking functions within them
refactor(AuthContext): cleanup code by using new hook and defining types in ~/common
* fix: login error message for rate limits
* docs: add info for automated mod system and rate limiters, update other docs accordingly
* chore: bump data-provider version
2023-09-13 10:57:07 -04:00
} ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
const Error = ( { text } : { text : string } ) = > {
2024-04-21 08:31:54 -04:00
const localize = useLocalize ( ) ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
const jsonString = extractJson ( text ) ;
const errorMessage = text . length > 512 && ! jsonString ? text . slice ( 0 , 512 ) + '...' : text ;
feat: Message Rate Limiters, Violation Logging, & Ban System 🔨 (#903)
* refactor: require Auth middleware in route index files
* feat: concurrent message limiter
* feat: complete concurrent message limiter with caching
* refactor: SSE response methods separated from handleText
* fix(abortMiddleware): fix req and res order to standard, use endpointOption in req.body
* chore: minor name changes
* refactor: add isUUID condition to saveMessage
* fix(concurrentLimiter): logic correctly handles the max number of concurrent messages and res closing/finalization
* chore: bump keyv and remove console.log from Message
* fix(concurrentLimiter): ensure messages are only saved in later message children
* refactor(concurrentLimiter): use KeyvFile instead, could make other stores configurable in the future
* feat: add denyRequest function for error responses
* feat(utils): add isStringTruthy function
Introduce the isStringTruthy function to the utilities module to check if a string value is a case-insensitive match for 'true'
* feat: add optional message rate limiters by IP and userId
* feat: add optional message rate limiters by IP and userId to edit route
* refactor: rename isStringTruthy to isTrue for brevity
* refactor(getError): use map to make code cleaner
* refactor: use memory for concurrent rate limiter to prevent clearing on startup/exit, add multiple log files, fix error message for concurrent violation
* feat: check if errorMessage is object, stringify if so
* chore: send object to denyRequest which will stringify it
* feat: log excessive requests
* fix(getError): correctly pluralize messages
* refactor(limiters): make type consistent between logs and errorMessage
* refactor(cache): move files out of lib/db into separate cache dir
>> feat: add getLogStores function so Keyv instance is not redundantly created on every violation
feat: separate violation logging to own function with logViolation
* fix: cache/index.js export, properly record userViolations
* refactor(messageLimiters): use new logging method, add logging to registrations
* refactor(logViolation): make userLogs an array of logs per user
* feat: add logging to login limiter
* refactor: pass req as first param to logViolation and record offending IP
* refactor: rename isTrue helper fn to isEnabled
* feat: add simple non_browser check and log violation
* fix: open handles in unit tests, remove KeyvMongo as not used and properly mock global fetch
* chore: adjust nodemon ignore paths to properly ignore logs
* feat: add math helper function for safe use of eval
* refactor(api/convos): use middleware at top of file to avoid redundancy
* feat: add delete all static method for Sessions
* fix: redirect to login on refresh if user is not found, or the session is not found but hasn't expired (ban case)
* refactor(getLogStores): adjust return type
* feat: add ban violation and check ban logic
refactor(logViolation): pass both req and res objects
* feat: add removePorts helper function
* refactor: rename getError to getMessageError and add getLoginError for displaying different login errors
* fix(AuthContext): fix type issue and remove unused code
* refactor(bans): ban by ip and user id, send response based on origin
* chore: add frontend ban messages
* refactor(routes/oauth): add ban check to handler, also consolidate logic to avoid redundancy
* feat: add ban check to AI messaging routes
* feat: add ban check to login/registration
* fix(ci/api): mock KeyvMongo to avoid tests hanging
* docs: update .env.example
> refactor(banViolation): calculate interval rate crossover, early return if duration is invalid
ci(banViolation): add tests to ensure users are only banned when expected
* docs: improve wording for mod system
* feat: add configurable env variables for violation scores
* chore: add jsdoc for uaParser.js
* chore: improve ban text log
* chore: update bun test scripts
* refactor(math.js): add fallback values
* fix(KeyvMongo/banLogs): refactor keyv instances to top of files to avoid memory leaks, refactor ban logic to use getLogStores instead
refactor(getLogStores): get a single log store by type
* fix(ci): refactor tests due to banLogs changes, also make sure to clear and revoke sessions even if ban duration is 0
* fix(banViolation.js): getLogStores import
* feat: handle 500 code error at login
* fix(middleware): handle case where user.id is _id and not just id
* ci: add ban secrets for backend unit tests
* refactor: logout user upon ban
* chore: log session delete message only if deletedCount > 0
* refactor: change default ban duration (2h) and make logic more clear in JSDOC
* fix: login and registration limiters will now return rate limiting error
* fix: userId not parsable as non ObjectId string
* feat: add useTimeout hook to properly clear timeouts when invoking functions within them
refactor(AuthContext): cleanup code by using new hook and defining types in ~/common
* fix: login error message for rate limits
* docs: add info for automated mod system and rate limiters, update other docs accordingly
* chore: bump data-provider version
2023-09-13 10:57:07 -04:00
const defaultResponse = ` Something went wrong. Here's the specific error message we encountered: ${ errorMessage } ` ;
if ( ! isJson ( jsonString ) ) {
return defaultResponse ;
}
const json = JSON . parse ( jsonString ) ;
const errorKey = json . code || json . type ;
const keyExists = errorKey && errorMessages [ errorKey ] ;
if ( keyExists && typeof errorMessages [ errorKey ] === 'function' ) {
2024-04-21 08:31:54 -04:00
return errorMessages [ errorKey ] ( json , localize ) ;
} else if ( keyExists && keyExists . startsWith ( localizedErrorPrefix ) ) {
return localize ( errorMessages [ errorKey ] ) ;
feat: Message Rate Limiters, Violation Logging, & Ban System 🔨 (#903)
* refactor: require Auth middleware in route index files
* feat: concurrent message limiter
* feat: complete concurrent message limiter with caching
* refactor: SSE response methods separated from handleText
* fix(abortMiddleware): fix req and res order to standard, use endpointOption in req.body
* chore: minor name changes
* refactor: add isUUID condition to saveMessage
* fix(concurrentLimiter): logic correctly handles the max number of concurrent messages and res closing/finalization
* chore: bump keyv and remove console.log from Message
* fix(concurrentLimiter): ensure messages are only saved in later message children
* refactor(concurrentLimiter): use KeyvFile instead, could make other stores configurable in the future
* feat: add denyRequest function for error responses
* feat(utils): add isStringTruthy function
Introduce the isStringTruthy function to the utilities module to check if a string value is a case-insensitive match for 'true'
* feat: add optional message rate limiters by IP and userId
* feat: add optional message rate limiters by IP and userId to edit route
* refactor: rename isStringTruthy to isTrue for brevity
* refactor(getError): use map to make code cleaner
* refactor: use memory for concurrent rate limiter to prevent clearing on startup/exit, add multiple log files, fix error message for concurrent violation
* feat: check if errorMessage is object, stringify if so
* chore: send object to denyRequest which will stringify it
* feat: log excessive requests
* fix(getError): correctly pluralize messages
* refactor(limiters): make type consistent between logs and errorMessage
* refactor(cache): move files out of lib/db into separate cache dir
>> feat: add getLogStores function so Keyv instance is not redundantly created on every violation
feat: separate violation logging to own function with logViolation
* fix: cache/index.js export, properly record userViolations
* refactor(messageLimiters): use new logging method, add logging to registrations
* refactor(logViolation): make userLogs an array of logs per user
* feat: add logging to login limiter
* refactor: pass req as first param to logViolation and record offending IP
* refactor: rename isTrue helper fn to isEnabled
* feat: add simple non_browser check and log violation
* fix: open handles in unit tests, remove KeyvMongo as not used and properly mock global fetch
* chore: adjust nodemon ignore paths to properly ignore logs
* feat: add math helper function for safe use of eval
* refactor(api/convos): use middleware at top of file to avoid redundancy
* feat: add delete all static method for Sessions
* fix: redirect to login on refresh if user is not found, or the session is not found but hasn't expired (ban case)
* refactor(getLogStores): adjust return type
* feat: add ban violation and check ban logic
refactor(logViolation): pass both req and res objects
* feat: add removePorts helper function
* refactor: rename getError to getMessageError and add getLoginError for displaying different login errors
* fix(AuthContext): fix type issue and remove unused code
* refactor(bans): ban by ip and user id, send response based on origin
* chore: add frontend ban messages
* refactor(routes/oauth): add ban check to handler, also consolidate logic to avoid redundancy
* feat: add ban check to AI messaging routes
* feat: add ban check to login/registration
* fix(ci/api): mock KeyvMongo to avoid tests hanging
* docs: update .env.example
> refactor(banViolation): calculate interval rate crossover, early return if duration is invalid
ci(banViolation): add tests to ensure users are only banned when expected
* docs: improve wording for mod system
* feat: add configurable env variables for violation scores
* chore: add jsdoc for uaParser.js
* chore: improve ban text log
* chore: update bun test scripts
* refactor(math.js): add fallback values
* fix(KeyvMongo/banLogs): refactor keyv instances to top of files to avoid memory leaks, refactor ban logic to use getLogStores instead
refactor(getLogStores): get a single log store by type
* fix(ci): refactor tests due to banLogs changes, also make sure to clear and revoke sessions even if ban duration is 0
* fix(banViolation.js): getLogStores import
* feat: handle 500 code error at login
* fix(middleware): handle case where user.id is _id and not just id
* ci: add ban secrets for backend unit tests
* refactor: logout user upon ban
* chore: log session delete message only if deletedCount > 0
* refactor: change default ban duration (2h) and make logic more clear in JSDOC
* fix: login and registration limiters will now return rate limiting error
* fix: userId not parsable as non ObjectId string
* feat: add useTimeout hook to properly clear timeouts when invoking functions within them
refactor(AuthContext): cleanup code by using new hook and defining types in ~/common
* fix: login error message for rate limits
* docs: add info for automated mod system and rate limiters, update other docs accordingly
* chore: bump data-provider version
2023-09-13 10:57:07 -04:00
} else if ( keyExists ) {
return errorMessages [ errorKey ] ;
} else {
return defaultResponse ;
}
} ;
feat: Accurate Token Usage Tracking & Optional Balance (#1018)
* refactor(Chains/llms): allow passing callbacks
* refactor(BaseClient): accurately count completion tokens as generation only
* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM
* wip: summary prompt tokens
* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing
* refactor(createLLM): make streaming prop false by default
* chore: remove use of getTokenCountForResponse
* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace
* chore: remove passing of streaming prop, also console log useful vars for tracing
* feat: formatFromLangChain helper function to count tokens for ChatModelStart
* refactor(initializeLLM): add role for LLM tracing
* chore(formatFromLangChain): update JSDoc
* feat(formatMessages): formats langChain messages into OpenAI payload format
* chore: install openai-chat-tokens
* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring
* feat: accurate prompt tokens for ChatModelStart before generation
* refactor(handleChatModelStart): move to callbacks dir, use factory function
* refactor(initializeLLM): rename 'role' to 'context'
* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file
* refactor(initializeClient): add req,res objects to client options
* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update
* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match
* refactor(Tx): add fair fallback value multiplier incase the config result is undefined
* refactor(Balance): rename 'tokens' to 'tokenCredits'
* feat: balance check, add tx.js for new tx-related methods and tests
* chore(summaryPrompts): update prompt token count
* refactor(callbacks): pass req, res
wip: check balance
* refactor(Tx): make convoId a String type, fix(calculateTokenValue)
* refactor(BaseClient): add conversationId as client prop when assigned
* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls
* feat(spendTokens): helper to spend prompt/completion tokens
* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance
* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance
* chore: remove prompt and completion token logging from route handler
* chore(spendTokens): add JSDoc
* feat(logTokenCost): record transactions for basic api calls
* chore(ask/edit): invoke getResponseSender only once per API call
* refactor(ask/edit): pass promptTokens to getIds and include in abort data
* refactor(getIds -> getReqData): rename function
* refactor(Tx): increase value if incomplete message
* feat: record tokenUsage when message is aborted
* refactor: subtract tokens when payload includes function_call
* refactor: add namespace for token_balance
* fix(spendTokens): only execute if corresponding token type amounts are defined
* refactor(checkBalance): throws Error if not enough token credits
* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'
* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens
* fix: properly cancel title requests when there isn't enough tokens to generate
* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain
* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError
* refactor(createStartHandler): if summary, add error details to runs
* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer
* refactor(logTokenCost -> recordTokenUsage): rename
* refactor(checkBalance): include promptTokens in errorMessage
* refactor(checkBalance/spendTokens): move to models dir
* fix(createLanguageChain): correctly pass config
* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check
* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called
* refactor(createStartHandler): add error to run if context is plugins as well
* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run
* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic
* chore: use absolute equality for addTitle condition
* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional
* style: icon changes to match official
* fix(BaseClient): getTokenCountForResponse -> getTokenCount
* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc
* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled
* fix(e2e/cleanUp): cleanup new collections, import all model methods from index
* fix(config/add-balance): add uncaughtException listener
* fix: circular dependency
* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance
* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped
* fix(createStartHandler): correct condition for generations
* chore: bump postcss due to moderate severity vulnerability
* chore: bump zod due to low severity vulnerability
* chore: bump openai & data-provider version
* feat(types): OpenAI Message types
* chore: update bun lockfile
* refactor(CodeBlock): add error block formatting
* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON
* chore(logViolation): delete user_id after error is logged
* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex
* fix(DALL-E): use latest openai SDK
* chore: reorganize imports, fix type issue
* feat(server): add balance route
* fix(api/models): add auth
* feat(data-provider): /api/balance query
* feat: show balance if checking is enabled, refetch on final message or error
* chore: update docs, .env.example with token_usage info, add balance script command
* fix(Balance): fallback to empty obj for balance query
* style: slight adjustment of balance element
* docs(token_usage): add PR notes
2023-10-05 18:34:10 -04:00
export default Error ;