feat: Accurate Token Usage Tracking & Optional Balance (#1018)

* refactor(Chains/llms): allow passing callbacks

* refactor(BaseClient): accurately count completion tokens as generation only

* refactor(OpenAIClient): remove unused getTokenCountForResponse, pass streaming var and callbacks in initializeLLM

* wip: summary prompt tokens

* refactor(summarizeMessages): new cut-off strategy that generates a better summary by adding context from beginning, truncating the middle, and providing the end
wip: draft out relevant providers and variables for token tracing

* refactor(createLLM): make streaming prop false by default

* chore: remove use of getTokenCountForResponse

* refactor(agents): use BufferMemory as ConversationSummaryBufferMemory token usage not easy to trace

* chore: remove passing of streaming prop, also console log useful vars for tracing

* feat: formatFromLangChain helper function to count tokens for ChatModelStart

* refactor(initializeLLM): add role for LLM tracing

* chore(formatFromLangChain): update JSDoc

* feat(formatMessages): formats langChain messages into OpenAI payload format

* chore: install openai-chat-tokens

* refactor(formatMessage): optimize conditional langChain logic
fix(formatFromLangChain): fix destructuring

* feat: accurate prompt tokens for ChatModelStart before generation

* refactor(handleChatModelStart): move to callbacks dir, use factory function

* refactor(initializeLLM): rename 'role' to 'context'

* feat(Balance/Transaction): new schema/models for tracking token spend
refactor(Key): factor out model export to separate file

* refactor(initializeClient): add req,res objects to client options

* feat: add-balance script to add to an existing users' token balance
refactor(Transaction): use multiplier map/function, return balance update

* refactor(Tx): update enum for tokenType, return 1 for multiplier if no map match

* refactor(Tx): add fair fallback value multiplier incase the config result is undefined

* refactor(Balance): rename 'tokens' to 'tokenCredits'

* feat: balance check, add tx.js for new tx-related methods and tests

* chore(summaryPrompts): update prompt token count

* refactor(callbacks): pass req, res
wip: check balance

* refactor(Tx): make convoId a String type, fix(calculateTokenValue)

* refactor(BaseClient): add conversationId as client prop when assigned

* feat(RunManager): track LLM runs with manager, track token spend from LLM,
refactor(OpenAIClient): use RunManager to create callbacks, pass user prop to langchain api calls

* feat(spendTokens): helper to spend prompt/completion tokens

* feat(checkBalance): add helper to check, log, deny request if balance doesn't have enough funds
refactor(Balance): static check method to return object instead of boolean now
wip(OpenAIClient): implement use of checkBalance

* refactor(initializeLLM): add token buffer to assure summary isn't generated when subsequent payload is too large
refactor(OpenAIClient): add checkBalance
refactor(createStartHandler): add checkBalance

* chore: remove prompt and completion token logging from route handler

* chore(spendTokens): add JSDoc

* feat(logTokenCost): record transactions for basic api calls

* chore(ask/edit): invoke getResponseSender only once per API call

* refactor(ask/edit): pass promptTokens to getIds and include in abort data

* refactor(getIds -> getReqData): rename function

* refactor(Tx): increase value if incomplete message

* feat: record tokenUsage when message is aborted

* refactor: subtract tokens when payload includes function_call

* refactor: add namespace for token_balance

* fix(spendTokens): only execute if corresponding token type amounts are defined

* refactor(checkBalance): throws Error if not enough token credits

* refactor(runTitleChain): pass and use signal, spread object props in create helpers, and use 'call' instead of 'run'

* fix(abortMiddleware): circular dependency, and default to empty string for completionTokens

* fix: properly cancel title requests when there isn't enough tokens to generate

* feat(predictNewSummary): custom chain for summaries to allow signal passing
refactor(summaryBuffer): use new custom chain

* feat(RunManager): add getRunByConversationId method, refactor: remove run and throw llm error on handleLLMError

* refactor(createStartHandler): if summary, add error details to runs

* fix(OpenAIClient): support aborting from summarization & showing error to user
refactor(summarizeMessages): remove unnecessary operations counting summaryPromptTokens and note for alternative, pass signal to summaryBuffer

* refactor(logTokenCost -> recordTokenUsage): rename

* refactor(checkBalance): include promptTokens in errorMessage

* refactor(checkBalance/spendTokens): move to models dir

* fix(createLanguageChain): correctly pass config

* refactor(initializeLLM/title): add tokenBuffer of 150 for balance check

* refactor(openAPIPlugin): pass signal and memory, filter functions by the one being called

* refactor(createStartHandler): add error to run if context is plugins as well

* refactor(RunManager/handleLLMError): throw error immediately if plugins, don't remove run

* refactor(PluginsClient): pass memory and signal to tools, cleanup error handling logic

* chore: use absolute equality for addTitle condition

* refactor(checkBalance): move checkBalance to execute after userMessage and tokenCounts are saved, also make conditional

* style: icon changes to match official

* fix(BaseClient): getTokenCountForResponse -> getTokenCount

* fix(formatLangChainMessages): add kwargs as fallback prop from lc_kwargs, update JSDoc

* refactor(Tx.create): does not update balance if CHECK_BALANCE is not enabled

* fix(e2e/cleanUp): cleanup new collections, import all model methods from index

* fix(config/add-balance): add uncaughtException listener

* fix: circular dependency

* refactor(initializeLLM/checkBalance): append new generations to errorMessage if cost exceeds balance

* fix(handleResponseMessage): only record token usage in this method if not error and completion is not skipped

* fix(createStartHandler): correct condition for generations

* chore: bump postcss due to moderate severity vulnerability

* chore: bump zod due to low severity vulnerability

* chore: bump openai & data-provider version

* feat(types): OpenAI Message types

* chore: update bun lockfile

* refactor(CodeBlock): add error block formatting

* refactor(utils/Plugin): factor out formatJSON and cn to separate files (json.ts and cn.ts), add extractJSON

* chore(logViolation): delete user_id after error is logged

* refactor(getMessageError -> Error): change to React.FC, add token_balance handling, use extractJSON to determine JSON instead of regex

* fix(DALL-E): use latest openai SDK

* chore: reorganize imports, fix type issue

* feat(server): add balance route

* fix(api/models): add auth

* feat(data-provider): /api/balance query

* feat: show balance if checking is enabled, refetch on final message or error

* chore: update docs, .env.example with token_usage info, add balance script command

* fix(Balance): fallback to empty obj for balance query

* style: slight adjustment of balance element

* docs(token_usage): add PR notes
This commit is contained in:
Danny Avila 2023-10-05 18:34:10 -04:00 committed by GitHub
parent be71a1947b
commit 365c39c405
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
81 changed files with 1606 additions and 293 deletions

View file

@ -34,12 +34,12 @@ const Icon: React.FC<IconProps> = (props) => {
} else {
const endpointIcons = {
azureOpenAI: {
icon: <AzureMinimalIcon size={size * 0.55} />,
icon: <AzureMinimalIcon size={size * 0.5555555555555556} />,
bg: 'linear-gradient(0.375turn, #61bde2, #4389d0)',
name: 'ChatGPT',
},
openAI: {
icon: <GPTIcon size={size * 0.55} />,
icon: <GPTIcon size={size * 0.5555555555555556} />,
bg:
typeof model === 'string' && model.toLowerCase().includes('gpt-4')
? '#AB68FF'
@ -52,7 +52,11 @@ const Icon: React.FC<IconProps> = (props) => {
name: 'Plugins',
},
google: { icon: <img src="/assets/google-palm.svg" alt="Palm Icon" />, name: 'PaLM2' },
anthropic: { icon: <AnthropicIcon size={size * 0.55} />, bg: '#d09a74', name: 'Claude' },
anthropic: {
icon: <AnthropicIcon size={size * 0.5555555555555556} />,
bg: '#d09a74',
name: 'Claude',
},
bingAI: {
icon: jailbreak ? (
<img src="/assets/bingai-jb.png" alt="Bing Icon" />
@ -62,7 +66,7 @@ const Icon: React.FC<IconProps> = (props) => {
name: jailbreak ? 'Sydney' : 'BingAI',
},
chatGPTBrowser: {
icon: <GPTIcon size={size * 0.55} />,
icon: <GPTIcon size={size * 0.5555555555555556} />,
bg:
typeof model === 'string' && model.toLowerCase().includes('gpt-4')
? '#AB68FF'

View file

@ -1,16 +1,23 @@
import React, { useRef, useState, RefObject } from 'react';
import copy from 'copy-to-clipboard';
import { Clipboard, CheckMark } from '~/components';
import { InfoIcon } from 'lucide-react';
import { cn } from '~/utils/';
import React, { useRef, useState, RefObject } from 'react';
import Clipboard from '~/components/svg/Clipboard';
import CheckMark from '~/components/svg/CheckMark';
import cn from '~/utils/cn';
interface CodeBarProps {
type CodeBarProps = {
lang: string;
codeRef: RefObject<HTMLElement>;
plugin?: boolean;
}
error?: boolean;
};
const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = null }) => {
type CodeBlockProps = Pick<CodeBarProps, 'lang' | 'plugin' | 'error'> & {
codeChildren: React.ReactNode;
classProp?: string;
};
const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, error, plugin = null }) => {
const [isCopied, setIsCopied] = useState(false);
return (
<div className="relative flex items-center rounded-tl-md rounded-tr-md bg-gray-800 px-4 py-2 font-sans text-xs text-gray-200">
@ -19,7 +26,7 @@ const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = nu
<InfoIcon className="ml-auto flex h-4 w-4 gap-2 text-white/50" />
) : (
<button
className="ml-auto flex gap-2"
className={cn('ml-auto flex gap-2', error ? 'h-4 w-4 items-start text-white/50' : '')}
onClick={async () => {
const codeString = codeRef.current?.textContent;
if (codeString) {
@ -35,12 +42,12 @@ const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = nu
{isCopied ? (
<>
<CheckMark />
Copied!
{error ? '' : 'Copied!'}
</>
) : (
<>
<Clipboard />
Copy code
{error ? '' : 'Copy code'}
</>
)}
</button>
@ -49,30 +56,24 @@ const CodeBar: React.FC<CodeBarProps> = React.memo(({ lang, codeRef, plugin = nu
);
});
interface CodeBlockProps {
lang: string;
codeChildren: React.ReactNode;
classProp?: string;
plugin?: boolean;
}
const CodeBlock: React.FC<CodeBlockProps> = ({
lang,
codeChildren,
classProp = '',
plugin = null,
error,
}) => {
const codeRef = useRef<HTMLElement>(null);
const language = plugin ? 'json' : lang;
const language = plugin || error ? 'json' : lang;
return (
<div className="w-full rounded-md bg-black text-xs text-white/80">
<CodeBar lang={lang} codeRef={codeRef} plugin={!!plugin} />
<CodeBar lang={lang} codeRef={codeRef} plugin={!!plugin} error={error} />
<div className={cn(classProp, 'overflow-y-auto p-4')}>
<code
ref={codeRef}
className={cn(
plugin ? '!whitespace-pre-wrap' : `hljs language-${language} !whitespace-pre`,
plugin || error ? '!whitespace-pre-wrap' : `hljs language-${language} !whitespace-pre`,
)}
>
{codeChildren}

View file

@ -1,7 +1,13 @@
import React from 'react';
import type { TOpenAIMessage } from 'librechat-data-provider';
import { formatJSON, extractJson } from '~/utils/json';
import CodeBlock from './CodeBlock';
const isJson = (str: string) => {
try {
JSON.parse(str);
} catch (e) {
console.error(e);
return false;
}
return true;
@ -16,6 +22,17 @@ type TMessageLimit = {
windowInMinutes: number;
};
type TTokenBalance = {
type: 'token_balance';
balance: number;
tokenCost: number;
promptTokens: number;
prev_count: number;
violation_count: number;
date: Date;
generations?: TOpenAIMessage[];
};
const errorMessages = {
ban: 'Your account has been temporarily banned due to violations of our service.',
invalid_api_key:
@ -34,12 +51,33 @@ const errorMessages = {
windowInMinutes > 1 ? `${windowInMinutes} minutes` : 'minute'
}.`;
},
token_balance: (json: TTokenBalance) => {
const { balance, tokenCost, promptTokens, generations } = json;
const message = `Insufficient Funds! Balance: ${balance}. Prompt tokens: ${promptTokens}. Cost: ${tokenCost}.`;
return (
<>
{message}
{generations && (
<>
<br />
<br />
</>
)}
{generations && (
<CodeBlock
lang="Generations"
error={true}
codeChildren={formatJSON(JSON.stringify(generations))}
/>
)}
</>
);
},
};
const getMessageError = (text: string) => {
const errorMessage = text.length > 512 ? text.slice(0, 512) + '...' : text;
const match = text.match(/\{[^{}]*\}/);
const jsonString = match ? match[0] : '';
const Error = ({ text }: { text: string }) => {
const jsonString = extractJson(text);
const errorMessage = text.length > 512 && !jsonString ? text.slice(0, 512) + '...' : text;
const defaultResponse = `Something went wrong. Here's the specific error message we encountered: ${errorMessage}`;
if (!isJson(jsonString)) {
@ -59,4 +97,4 @@ const getMessageError = (text: string) => {
}
};
export default getMessageError;
export default Error;

View file

@ -2,11 +2,12 @@ import { Fragment } from 'react';
import type { TResPlugin } from 'librechat-data-provider';
import type { TMessageContent, TText, TDisplayProps } from '~/common';
import { useAuthContext } from '~/hooks';
import { cn, getMessageError } from '~/utils';
import { cn } from '~/utils';
import EditMessage from './EditMessage';
import Container from './Container';
import Markdown from './Markdown';
import Plugin from './Plugin';
import Error from './Error';
const ErrorMessage = ({ text }: TText) => {
const { logout } = useAuthContext();
@ -18,7 +19,7 @@ const ErrorMessage = ({ text }: TText) => {
return (
<Container>
<div className="rounded-md border border-red-500 bg-red-500/10 px-3 py-2 text-sm text-gray-600 dark:text-gray-100">
{getMessageError(text)}
<Error text={text} />
</div>
</Container>
);

View file

@ -1,11 +1,11 @@
import { useRecoilValue } from 'recoil';
import { Disclosure } from '@headlessui/react';
import { useCallback, memo, ReactNode } from 'react';
import type { TResPlugin, TInput } from 'librechat-data-provider';
import { ChevronDownIcon, LucideProps } from 'lucide-react';
import { Disclosure } from '@headlessui/react';
import { useRecoilValue } from 'recoil';
import { cn, formatJSON } from '~/utils';
import { Spinner } from '~/components';
import CodeBlock from './CodeBlock';
import { cn } from '~/utils/';
import store from '~/store';
type PluginsMap = {
@ -16,14 +16,6 @@ type PluginIconProps = LucideProps & {
className?: string;
};
function formatJSON(json: string) {
try {
return JSON.stringify(JSON.parse(json), null, 2);
} catch (e) {
return json;
}
}
function formatInputs(inputs: TInput[]) {
let output = '';

View file

@ -94,7 +94,7 @@ export default function Message({
...conversation,
...message,
model: message?.model ?? conversation?.model,
size: 38,
size: 36,
});
if (message?.bg && searchResult) {

View file

@ -1,27 +1,31 @@
import { Download } from 'lucide-react';
import { useRecoilValue } from 'recoil';
import { Fragment, useState } from 'react';
import { useGetUserBalance, useGetStartupConfig } from 'librechat-data-provider';
import type { TConversation } from 'librechat-data-provider';
import { Menu, Transition } from '@headlessui/react';
import { ExportModel } from './ExportConversation';
import ClearConvos from './ClearConvos';
import Settings from './Settings';
import NavLink from './NavLink';
import Logout from './Logout';
import { ExportModel } from './ExportConversation';
import { LinkIcon, DotsIcon, GearIcon } from '~/components';
import { useLocalize } from '~/hooks';
import { useAuthContext } from '~/hooks/AuthContext';
import { useLocalize } from '~/hooks';
import { cn } from '~/utils/';
import store from '~/store';
export default function NavLinks() {
const balanceQuery = useGetUserBalance();
const { data: startupConfig } = useGetStartupConfig();
const [showExports, setShowExports] = useState(false);
const [showClearConvos, setShowClearConvos] = useState(false);
const [showSettings, setShowSettings] = useState(false);
const { user } = useAuthContext();
const localize = useLocalize();
const conversation = useRecoilValue(store.conversation) || {};
const conversation = useRecoilValue(store.conversation) ?? ({} as TConversation);
const exportable =
conversation?.conversationId &&
@ -39,6 +43,11 @@ export default function NavLinks() {
<Menu as="div" className="group relative">
{({ open }) => (
<>
{startupConfig?.checkBalance && balanceQuery.data && (
<div className="m-1 ml-3 whitespace-nowrap text-left text-sm text-gray-100">
{`Balance: ${balanceQuery.data}`}
</div>
)}
<Menu.Button
className={cn(
'group-ui-open:bg-gray-800 flex w-full items-center gap-2.5 rounded-md px-3 py-3 text-sm transition-colors duration-200 hover:bg-gray-800',

View file

@ -1,7 +1,14 @@
import { useEffect } from 'react';
import { useResetRecoilState, useSetRecoilState } from 'recoil';
/* @ts-ignore */
import { SSE, createPayload, tMessageSchema, tConversationSchema } from 'librechat-data-provider';
import {
/* @ts-ignore */
SSE,
createPayload,
useGetUserBalance,
tMessageSchema,
tConversationSchema,
useGetStartupConfig,
} from 'librechat-data-provider';
import type { TResPlugin, TMessage, TConversation, TSubmission } from 'librechat-data-provider';
import useConversations from './useConversations';
import { useAuthContext } from './AuthContext';
@ -24,7 +31,9 @@ export default function useServerStream(submission: TSubmission | null) {
const resetLatestMessage = useResetRecoilState(store.latestMessage);
const { token } = useAuthContext();
const { data: startupConfig } = useGetStartupConfig();
const { refreshConversations } = useConversations();
const balanceQuery = useGetUserBalance();
const messageHandler = (data: string, submission: TSubmission) => {
const {
@ -228,6 +237,7 @@ export default function useServerStream(submission: TSubmission | null) {
if (data.final) {
const { plugins } = data;
finalHandler(data, { ...submission, plugins, message });
startupConfig?.checkBalance && balanceQuery.refetch();
console.log('final', data);
}
if (data.created) {
@ -253,6 +263,7 @@ export default function useServerStream(submission: TSubmission | null) {
events.onerror = function (e: MessageEvent) {
console.log('error in opening conn.');
startupConfig?.checkBalance && balanceQuery.refetch();
events.close();
const data = JSON.parse(e.data);

6
client/src/utils/cn.ts Normal file
View file

@ -0,0 +1,6 @@
import { twMerge } from 'tailwind-merge';
import { clsx } from 'clsx';
export default function cn(...inputs: string[]) {
return twMerge(clsx(inputs));
}

View file

@ -1,20 +1,14 @@
import { clsx } from 'clsx';
import { twMerge } from 'tailwind-merge';
export * from './json';
export * from './languages';
export { default as cn } from './cn';
export { default as buildTree } from './buildTree';
export { default as getLoginError } from './getLoginError';
export { default as cleanupPreset } from './cleanupPreset';
export { default as validateIframe } from './validateIframe';
export { default as getMessageError } from './getMessageError';
export { default as buildDefaultConvo } from './buildDefaultConvo';
export { default as getDefaultEndpoint } from './getDefaultEndpoint';
export { default as getLocalStorageItems } from './getLocalStorageItems';
export function cn(...inputs: string[]) {
return twMerge(clsx(inputs));
}
export const languages = [
'java',
'c',

28
client/src/utils/json.ts Normal file
View file

@ -0,0 +1,28 @@
export function formatJSON(json: string) {
try {
return JSON.stringify(JSON.parse(json), null, 2);
} catch (e) {
return json;
}
}
export function extractJson(text: string) {
let openBraces = 0;
let startIndex = -1;
for (let i = 0; i < text.length; i++) {
if (text[i] === '{') {
if (openBraces === 0) {
startIndex = i;
}
openBraces++;
} else if (text[i] === '}') {
openBraces--;
if (openBraces === 0 && startIndex !== -1) {
return text.slice(startIndex, i + 1);
}
}
}
return '';
}