mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-23 11:50:14 +01:00
🚀 feat: o1 (#4019)
* feat: o1 default response sender string * feat: add o1 models to default openai models list, add `no_system_messages` error type; refactor: use error type as localization key * refactor(MessageEndpointIcon): differentiate openAI icon model color for o1 models * refactor(AnthropicClient): use new input/output tokens keys; add prompt caching for claude-3-opus * refactor(BaseClient): to use new input/output tokens keys; update typedefs * feat: initial o1 model handling, including token cost complexity * EXPERIMENTAL: special handling for o1 model with custom instructions
This commit is contained in:
parent
9a393be012
commit
45b42830a5
12 changed files with 229 additions and 36 deletions
|
|
@ -64,6 +64,12 @@ class AnthropicClient extends BaseClient {
|
|||
/** Whether or not the model supports Prompt Caching
|
||||
* @type {boolean} */
|
||||
this.supportsCacheControl;
|
||||
/** The key for the usage object's input tokens
|
||||
* @type {string} */
|
||||
this.inputTokensKey = 'input_tokens';
|
||||
/** The key for the usage object's output tokens
|
||||
* @type {string} */
|
||||
this.outputTokensKey = 'output_tokens';
|
||||
}
|
||||
|
||||
setOptions(options) {
|
||||
|
|
@ -200,7 +206,7 @@ class AnthropicClient extends BaseClient {
|
|||
}
|
||||
|
||||
/**
|
||||
* Calculates the correct token count for the current message based on the token count map and API usage.
|
||||
* Calculates the correct token count for the current user message based on the token count map and API usage.
|
||||
* Edge case: If the calculation results in a negative value, it returns the original estimate.
|
||||
* If revisiting a conversation with a chat history entirely composed of token estimates,
|
||||
* the cumulative token count going forward should become more accurate as the conversation progresses.
|
||||
|
|
@ -208,7 +214,7 @@ class AnthropicClient extends BaseClient {
|
|||
* @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
|
||||
* @param {string} params.currentMessageId - The ID of the current message to calculate.
|
||||
* @param {AnthropicStreamUsage} params.usage - The usage object returned by the API.
|
||||
* @returns {number} The correct token count for the current message.
|
||||
* @returns {number} The correct token count for the current user message.
|
||||
*/
|
||||
calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
|
||||
const originalEstimate = tokenCountMap[currentMessageId] || 0;
|
||||
|
|
@ -680,7 +686,11 @@ class AnthropicClient extends BaseClient {
|
|||
*/
|
||||
checkPromptCacheSupport(modelName) {
|
||||
const modelMatch = matchModelName(modelName, EModelEndpoint.anthropic);
|
||||
if (modelMatch === 'claude-3-5-sonnet' || modelMatch === 'claude-3-haiku') {
|
||||
if (
|
||||
modelMatch === 'claude-3-5-sonnet' ||
|
||||
modelMatch === 'claude-3-haiku' ||
|
||||
modelMatch === 'claude-3-opus'
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -42,6 +42,12 @@ class BaseClient {
|
|||
this.conversationId;
|
||||
/** @type {string} */
|
||||
this.responseMessageId;
|
||||
/** The key for the usage object's input tokens
|
||||
* @type {string} */
|
||||
this.inputTokensKey = 'prompt_tokens';
|
||||
/** The key for the usage object's output tokens
|
||||
* @type {string} */
|
||||
this.outputTokensKey = 'completion_tokens';
|
||||
}
|
||||
|
||||
setOptions() {
|
||||
|
|
@ -604,8 +610,8 @@ class BaseClient {
|
|||
* @type {StreamUsage | null} */
|
||||
const usage = this.getStreamUsage != null ? this.getStreamUsage() : null;
|
||||
|
||||
if (usage != null && Number(usage.output_tokens) > 0) {
|
||||
responseMessage.tokenCount = usage.output_tokens;
|
||||
if (usage != null && Number(usage[this.outputTokensKey]) > 0) {
|
||||
responseMessage.tokenCount = usage[this.outputTokensKey];
|
||||
completionTokens = responseMessage.tokenCount;
|
||||
await this.updateUserMessageTokenCount({ usage, tokenCountMap, userMessage, opts });
|
||||
} else {
|
||||
|
|
@ -655,7 +661,7 @@ class BaseClient {
|
|||
/** @type {boolean} */
|
||||
const shouldUpdateCount =
|
||||
this.calculateCurrentTokenCount != null &&
|
||||
Number(usage.input_tokens) > 0 &&
|
||||
Number(usage[this.inputTokensKey]) > 0 &&
|
||||
(this.options.resendFiles ||
|
||||
(!this.options.resendFiles && !this.options.attachments?.length)) &&
|
||||
!this.options.promptPrefix;
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ const {
|
|||
constructAzureURL,
|
||||
getModelMaxTokens,
|
||||
genAzureChatCompletion,
|
||||
getModelMaxOutputTokens,
|
||||
} = require('~/utils');
|
||||
const {
|
||||
truncateText,
|
||||
|
|
@ -64,6 +65,9 @@ class OpenAIClient extends BaseClient {
|
|||
|
||||
/** @type {string | undefined} - The API Completions URL */
|
||||
this.completionsUrl;
|
||||
|
||||
/** @type {OpenAIUsageMetadata | undefined} */
|
||||
this.usage;
|
||||
}
|
||||
|
||||
// TODO: PluginsClient calls this 3x, unneeded
|
||||
|
|
@ -138,7 +142,8 @@ class OpenAIClient extends BaseClient {
|
|||
|
||||
const { model } = this.modelOptions;
|
||||
|
||||
this.isChatCompletion = this.useOpenRouter || !!reverseProxy || model.includes('gpt');
|
||||
this.isChatCompletion =
|
||||
/\bo1\b/i.test(model) || model.includes('gpt') || this.useOpenRouter || !!reverseProxy;
|
||||
this.isChatGptModel = this.isChatCompletion;
|
||||
if (
|
||||
model.includes('text-davinci') ||
|
||||
|
|
@ -169,7 +174,14 @@ class OpenAIClient extends BaseClient {
|
|||
logger.debug('[OpenAIClient] maxContextTokens', this.maxContextTokens);
|
||||
}
|
||||
|
||||
this.maxResponseTokens = this.modelOptions.max_tokens || 1024;
|
||||
this.maxResponseTokens =
|
||||
this.modelOptions.max_tokens ??
|
||||
getModelMaxOutputTokens(
|
||||
model,
|
||||
this.options.endpointType ?? this.options.endpoint,
|
||||
this.options.endpointTokenConfig,
|
||||
) ??
|
||||
1024;
|
||||
this.maxPromptTokens =
|
||||
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
|
||||
|
||||
|
|
@ -533,7 +545,8 @@ class OpenAIClient extends BaseClient {
|
|||
promptPrefix = this.augmentedPrompt + promptPrefix;
|
||||
}
|
||||
|
||||
if (promptPrefix) {
|
||||
const isO1Model = /\bo1\b/i.test(this.modelOptions.model);
|
||||
if (promptPrefix && !isO1Model) {
|
||||
promptPrefix = `Instructions:\n${promptPrefix.trim()}`;
|
||||
instructions = {
|
||||
role: 'system',
|
||||
|
|
@ -561,6 +574,16 @@ class OpenAIClient extends BaseClient {
|
|||
messages,
|
||||
};
|
||||
|
||||
/** EXPERIMENTAL */
|
||||
if (promptPrefix && isO1Model) {
|
||||
const lastUserMessageIndex = payload.findLastIndex((message) => message.role === 'user');
|
||||
if (lastUserMessageIndex !== -1) {
|
||||
payload[
|
||||
lastUserMessageIndex
|
||||
].content = `${promptPrefix}\n${payload[lastUserMessageIndex].content}`;
|
||||
}
|
||||
}
|
||||
|
||||
if (tokenCountMap) {
|
||||
tokenCountMap.instructions = instructions?.tokenCount;
|
||||
result.tokenCountMap = tokenCountMap;
|
||||
|
|
@ -885,6 +908,56 @@ ${convo}
|
|||
return title;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get stream usage as returned by this client's API response.
|
||||
* @returns {OpenAIUsageMetadata} The stream usage object.
|
||||
*/
|
||||
getStreamUsage() {
|
||||
if (
|
||||
typeof this.usage === 'object' &&
|
||||
typeof this.usage.completion_tokens_details === 'object'
|
||||
) {
|
||||
const outputTokens = Math.abs(
|
||||
this.usage.completion_tokens_details.reasoning_tokens - this.usage[this.outputTokensKey],
|
||||
);
|
||||
return {
|
||||
...this.usage.completion_tokens_details,
|
||||
[this.inputTokensKey]: this.usage[this.inputTokensKey],
|
||||
[this.outputTokensKey]: outputTokens,
|
||||
};
|
||||
}
|
||||
return this.usage;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the correct token count for the current user message based on the token count map and API usage.
|
||||
* Edge case: If the calculation results in a negative value, it returns the original estimate.
|
||||
* If revisiting a conversation with a chat history entirely composed of token estimates,
|
||||
* the cumulative token count going forward should become more accurate as the conversation progresses.
|
||||
* @param {Object} params - The parameters for the calculation.
|
||||
* @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
|
||||
* @param {string} params.currentMessageId - The ID of the current message to calculate.
|
||||
* @param {OpenAIUsageMetadata} params.usage - The usage object returned by the API.
|
||||
* @returns {number} The correct token count for the current user message.
|
||||
*/
|
||||
calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
|
||||
const originalEstimate = tokenCountMap[currentMessageId] || 0;
|
||||
|
||||
if (!usage || typeof usage[this.inputTokensKey] !== 'number') {
|
||||
return originalEstimate;
|
||||
}
|
||||
|
||||
tokenCountMap[currentMessageId] = 0;
|
||||
const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => {
|
||||
const numCount = Number(count);
|
||||
return sum + (isNaN(numCount) ? 0 : numCount);
|
||||
}, 0);
|
||||
const totalInputTokens = usage[this.inputTokensKey] ?? 0;
|
||||
|
||||
const currentMessageTokens = totalInputTokens - totalTokensFromMap;
|
||||
return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate;
|
||||
}
|
||||
|
||||
async summarizeMessages({ messagesToRefine, remainingContextTokens }) {
|
||||
logger.debug('[OpenAIClient] Summarizing messages...');
|
||||
let context = messagesToRefine;
|
||||
|
|
@ -1000,7 +1073,16 @@ ${convo}
|
|||
}
|
||||
}
|
||||
|
||||
async recordTokenUsage({ promptTokens, completionTokens, context = 'message' }) {
|
||||
/**
|
||||
* @param {object} params
|
||||
* @param {number} params.promptTokens
|
||||
* @param {number} params.completionTokens
|
||||
* @param {OpenAIUsageMetadata} [params.usage]
|
||||
* @param {string} [params.model]
|
||||
* @param {string} [params.context='message']
|
||||
* @returns {Promise<void>}
|
||||
*/
|
||||
async recordTokenUsage({ promptTokens, completionTokens, usage, context = 'message' }) {
|
||||
await spendTokens(
|
||||
{
|
||||
context,
|
||||
|
|
@ -1011,6 +1093,19 @@ ${convo}
|
|||
},
|
||||
{ promptTokens, completionTokens },
|
||||
);
|
||||
|
||||
if (typeof usage === 'object' && typeof usage.reasoning_tokens === 'number') {
|
||||
await spendTokens(
|
||||
{
|
||||
context: 'reasoning',
|
||||
model: this.modelOptions.model,
|
||||
conversationId: this.conversationId,
|
||||
user: this.user ?? this.options.req.user?.id,
|
||||
endpointTokenConfig: this.options.endpointTokenConfig,
|
||||
},
|
||||
{ completionTokens: usage.reasoning_tokens },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
getTokenCountForResponse(response) {
|
||||
|
|
@ -1191,6 +1286,10 @@ ${convo}
|
|||
/** @type {(value: void | PromiseLike<void>) => void} */
|
||||
let streamResolve;
|
||||
|
||||
if (modelOptions.stream && /\bo1\b/i.test(modelOptions.model)) {
|
||||
delete modelOptions.stream;
|
||||
}
|
||||
|
||||
if (modelOptions.stream) {
|
||||
streamPromise = new Promise((resolve) => {
|
||||
streamResolve = resolve;
|
||||
|
|
@ -1269,6 +1368,8 @@ ${convo}
|
|||
}
|
||||
|
||||
const { choices } = chatCompletion;
|
||||
this.usage = chatCompletion.usage;
|
||||
|
||||
if (!Array.isArray(choices) || choices.length === 0) {
|
||||
logger.warn('[OpenAIClient] Chat completion response has no choices');
|
||||
return intermediateReply.join('');
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue