🚀 feat: o1 (#4019)

* feat: o1 default response sender string

* feat: add o1 models to default openai models list, add `no_system_messages` error type; refactor: use error type as localization key

* refactor(MessageEndpointIcon): differentiate openAI icon model color for o1 models

* refactor(AnthropicClient): use new input/output tokens keys; add prompt caching for claude-3-opus

* refactor(BaseClient): to use new input/output tokens keys; update typedefs

* feat: initial o1 model handling, including token cost complexity

* EXPERIMENTAL: special handling for o1 model with custom instructions
This commit is contained in:
Danny Avila 2024-09-12 18:15:43 -04:00 committed by GitHub
parent 9a393be012
commit 45b42830a5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 229 additions and 36 deletions

View file

@ -19,6 +19,7 @@ const {
constructAzureURL,
getModelMaxTokens,
genAzureChatCompletion,
getModelMaxOutputTokens,
} = require('~/utils');
const {
truncateText,
@ -64,6 +65,9 @@ class OpenAIClient extends BaseClient {
/** @type {string | undefined} - The API Completions URL */
this.completionsUrl;
/** @type {OpenAIUsageMetadata | undefined} */
this.usage;
}
// TODO: PluginsClient calls this 3x, unneeded
@ -138,7 +142,8 @@ class OpenAIClient extends BaseClient {
const { model } = this.modelOptions;
this.isChatCompletion = this.useOpenRouter || !!reverseProxy || model.includes('gpt');
this.isChatCompletion =
/\bo1\b/i.test(model) || model.includes('gpt') || this.useOpenRouter || !!reverseProxy;
this.isChatGptModel = this.isChatCompletion;
if (
model.includes('text-davinci') ||
@ -169,7 +174,14 @@ class OpenAIClient extends BaseClient {
logger.debug('[OpenAIClient] maxContextTokens', this.maxContextTokens);
}
this.maxResponseTokens = this.modelOptions.max_tokens || 1024;
this.maxResponseTokens =
this.modelOptions.max_tokens ??
getModelMaxOutputTokens(
model,
this.options.endpointType ?? this.options.endpoint,
this.options.endpointTokenConfig,
) ??
1024;
this.maxPromptTokens =
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
@ -533,7 +545,8 @@ class OpenAIClient extends BaseClient {
promptPrefix = this.augmentedPrompt + promptPrefix;
}
if (promptPrefix) {
const isO1Model = /\bo1\b/i.test(this.modelOptions.model);
if (promptPrefix && !isO1Model) {
promptPrefix = `Instructions:\n${promptPrefix.trim()}`;
instructions = {
role: 'system',
@ -561,6 +574,16 @@ class OpenAIClient extends BaseClient {
messages,
};
/** EXPERIMENTAL */
if (promptPrefix && isO1Model) {
const lastUserMessageIndex = payload.findLastIndex((message) => message.role === 'user');
if (lastUserMessageIndex !== -1) {
payload[
lastUserMessageIndex
].content = `${promptPrefix}\n${payload[lastUserMessageIndex].content}`;
}
}
if (tokenCountMap) {
tokenCountMap.instructions = instructions?.tokenCount;
result.tokenCountMap = tokenCountMap;
@ -885,6 +908,56 @@ ${convo}
return title;
}
/**
* Get stream usage as returned by this client's API response.
* @returns {OpenAIUsageMetadata} The stream usage object.
*/
getStreamUsage() {
if (
typeof this.usage === 'object' &&
typeof this.usage.completion_tokens_details === 'object'
) {
const outputTokens = Math.abs(
this.usage.completion_tokens_details.reasoning_tokens - this.usage[this.outputTokensKey],
);
return {
...this.usage.completion_tokens_details,
[this.inputTokensKey]: this.usage[this.inputTokensKey],
[this.outputTokensKey]: outputTokens,
};
}
return this.usage;
}
/**
* Calculates the correct token count for the current user message based on the token count map and API usage.
* Edge case: If the calculation results in a negative value, it returns the original estimate.
* If revisiting a conversation with a chat history entirely composed of token estimates,
* the cumulative token count going forward should become more accurate as the conversation progresses.
* @param {Object} params - The parameters for the calculation.
* @param {Record<string, number>} params.tokenCountMap - A map of message IDs to their token counts.
* @param {string} params.currentMessageId - The ID of the current message to calculate.
* @param {OpenAIUsageMetadata} params.usage - The usage object returned by the API.
* @returns {number} The correct token count for the current user message.
*/
calculateCurrentTokenCount({ tokenCountMap, currentMessageId, usage }) {
const originalEstimate = tokenCountMap[currentMessageId] || 0;
if (!usage || typeof usage[this.inputTokensKey] !== 'number') {
return originalEstimate;
}
tokenCountMap[currentMessageId] = 0;
const totalTokensFromMap = Object.values(tokenCountMap).reduce((sum, count) => {
const numCount = Number(count);
return sum + (isNaN(numCount) ? 0 : numCount);
}, 0);
const totalInputTokens = usage[this.inputTokensKey] ?? 0;
const currentMessageTokens = totalInputTokens - totalTokensFromMap;
return currentMessageTokens > 0 ? currentMessageTokens : originalEstimate;
}
async summarizeMessages({ messagesToRefine, remainingContextTokens }) {
logger.debug('[OpenAIClient] Summarizing messages...');
let context = messagesToRefine;
@ -1000,7 +1073,16 @@ ${convo}
}
}
async recordTokenUsage({ promptTokens, completionTokens, context = 'message' }) {
/**
* @param {object} params
* @param {number} params.promptTokens
* @param {number} params.completionTokens
* @param {OpenAIUsageMetadata} [params.usage]
* @param {string} [params.model]
* @param {string} [params.context='message']
* @returns {Promise<void>}
*/
async recordTokenUsage({ promptTokens, completionTokens, usage, context = 'message' }) {
await spendTokens(
{
context,
@ -1011,6 +1093,19 @@ ${convo}
},
{ promptTokens, completionTokens },
);
if (typeof usage === 'object' && typeof usage.reasoning_tokens === 'number') {
await spendTokens(
{
context: 'reasoning',
model: this.modelOptions.model,
conversationId: this.conversationId,
user: this.user ?? this.options.req.user?.id,
endpointTokenConfig: this.options.endpointTokenConfig,
},
{ completionTokens: usage.reasoning_tokens },
);
}
}
getTokenCountForResponse(response) {
@ -1191,6 +1286,10 @@ ${convo}
/** @type {(value: void | PromiseLike<void>) => void} */
let streamResolve;
if (modelOptions.stream && /\bo1\b/i.test(modelOptions.model)) {
delete modelOptions.stream;
}
if (modelOptions.stream) {
streamPromise = new Promise((resolve) => {
streamResolve = resolve;
@ -1269,6 +1368,8 @@ ${convo}
}
const { choices } = chatCompletion;
this.usage = chatCompletion.usage;
if (!Array.isArray(choices) || choices.length === 0) {
logger.warn('[OpenAIClient] Chat completion response has no choices');
return intermediateReply.join('');