mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-01-25 03:36:12 +01:00
🪙 feat: Configure Max Context and Output Tokens (#2648)
* chore: make frequent 'error' log into 'debug' log * feat: add maxContextTokens as a conversation field * refactor(settings): increase popover height * feat: add DynamicInputNumber and maxContextTokens to all endpoints that support it (frontend), fix schema * feat: maxContextTokens handling (backend) * style: revert popover height * feat: max tokens * fix: Ollama Vision firebase compatibility * fix: Ollama Vision, use message_file_map to determine multimodal request * refactor: bring back MobileNav and improve title styling
This commit is contained in:
parent
5293b73b6d
commit
6ba7f60eec
26 changed files with 420 additions and 22 deletions
|
|
@ -75,7 +75,9 @@ class AnthropicClient extends BaseClient {
|
|||
this.options.attachments?.then((attachments) => this.checkVisionRequest(attachments));
|
||||
|
||||
this.maxContextTokens =
|
||||
getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ?? 100000;
|
||||
this.options.maxContextTokens ??
|
||||
getModelMaxTokens(this.modelOptions.model, EModelEndpoint.anthropic) ??
|
||||
100000;
|
||||
this.maxResponseTokens = this.modelOptions.maxOutputTokens || 1500;
|
||||
this.maxPromptTokens =
|
||||
this.options.maxPromptTokens || this.maxContextTokens - this.maxResponseTokens;
|
||||
|
|
@ -652,6 +654,7 @@ class AnthropicClient extends BaseClient {
|
|||
|
||||
getSaveOptions() {
|
||||
return {
|
||||
maxContextTokens: this.options.maxContextTokens,
|
||||
promptPrefix: this.options.promptPrefix,
|
||||
modelLabel: this.options.modelLabel,
|
||||
resendFiles: this.options.resendFiles,
|
||||
|
|
|
|||
|
|
@ -138,7 +138,10 @@ class GoogleClient extends BaseClient {
|
|||
!isGenerativeModel && !isChatModel && /code|text/.test(this.modelOptions.model);
|
||||
const { isTextModel } = this;
|
||||
|
||||
this.maxContextTokens = getModelMaxTokens(this.modelOptions.model, EModelEndpoint.google);
|
||||
this.maxContextTokens =
|
||||
this.options.maxContextTokens ??
|
||||
getModelMaxTokens(this.modelOptions.model, EModelEndpoint.google);
|
||||
|
||||
// The max prompt tokens is determined by the max context tokens minus the max response tokens.
|
||||
// Earlier messages will be dropped until the prompt is within the limit.
|
||||
this.maxResponseTokens = this.modelOptions.maxOutputTokens || settings.maxOutputTokens.default;
|
||||
|
|
|
|||
|
|
@ -161,11 +161,13 @@ class OpenAIClient extends BaseClient {
|
|||
model.startsWith('text-chat') || model.startsWith('text-davinci-002-render');
|
||||
|
||||
this.maxContextTokens =
|
||||
this.options.maxContextTokens ??
|
||||
getModelMaxTokens(
|
||||
model,
|
||||
this.options.endpointType ?? this.options.endpoint,
|
||||
this.options.endpointTokenConfig,
|
||||
) ?? 4095; // 1 less than maximum
|
||||
) ??
|
||||
4095; // 1 less than maximum
|
||||
|
||||
if (this.shouldSummarize) {
|
||||
this.maxContextTokens = Math.floor(this.maxContextTokens / 2);
|
||||
|
|
@ -407,6 +409,7 @@ class OpenAIClient extends BaseClient {
|
|||
|
||||
getSaveOptions() {
|
||||
return {
|
||||
maxContextTokens: this.options.maxContextTokens,
|
||||
chatGptLabel: this.options.chatGptLabel,
|
||||
promptPrefix: this.options.promptPrefix,
|
||||
resendFiles: this.options.resendFiles,
|
||||
|
|
@ -435,7 +438,11 @@ class OpenAIClient extends BaseClient {
|
|||
* @returns {Promise<MongoFile[]>}
|
||||
*/
|
||||
async addImageURLs(message, attachments) {
|
||||
const { files, image_urls } = await encodeAndFormat(this.options.req, attachments);
|
||||
const { files, image_urls } = await encodeAndFormat(
|
||||
this.options.req,
|
||||
attachments,
|
||||
this.options.endpoint,
|
||||
);
|
||||
message.image_urls = image_urls.length ? image_urls : undefined;
|
||||
return files;
|
||||
}
|
||||
|
|
@ -1158,7 +1165,7 @@ ${convo}
|
|||
});
|
||||
}
|
||||
|
||||
if (this.options.attachments && this.options.endpoint?.toLowerCase() === 'ollama') {
|
||||
if (this.message_file_map && this.options.endpoint?.toLowerCase() === 'ollama') {
|
||||
const ollamaClient = new OllamaClient({ baseURL });
|
||||
return await ollamaClient.chatCompletion({
|
||||
payload: modelOptions,
|
||||
|
|
|
|||
|
|
@ -40,7 +40,8 @@ class FakeClient extends BaseClient {
|
|||
};
|
||||
}
|
||||
|
||||
this.maxContextTokens = getModelMaxTokens(this.modelOptions.model) ?? 4097;
|
||||
this.maxContextTokens =
|
||||
this.options.maxContextTokens ?? getModelMaxTokens(this.modelOptions.model) ?? 4097;
|
||||
}
|
||||
buildMessages() {}
|
||||
getTokenCount(str) {
|
||||
|
|
|
|||
|
|
@ -348,7 +348,7 @@ module.exports = function mongoMeili(schema, options) {
|
|||
try {
|
||||
meiliDoc = await client.index('convos').getDocument(doc.conversationId);
|
||||
} catch (error) {
|
||||
logger.error(
|
||||
logger.debug(
|
||||
'[MeiliMongooseModel.findOneAndUpdate] Convo not found in MeiliSearch and will index ' +
|
||||
doc.conversationId,
|
||||
error,
|
||||
|
|
|
|||
|
|
@ -104,6 +104,12 @@ const conversationPreset = {
|
|||
type: String,
|
||||
},
|
||||
tools: { type: [{ type: String }], default: undefined },
|
||||
maxContextTokens: {
|
||||
type: Number,
|
||||
},
|
||||
max_tokens: {
|
||||
type: Number,
|
||||
},
|
||||
};
|
||||
|
||||
const agentOptions = {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,14 @@
|
|||
const buildOptions = (endpoint, parsedBody) => {
|
||||
const { modelLabel, promptPrefix, resendFiles, iconURL, greeting, spec, ...rest } = parsedBody;
|
||||
const {
|
||||
modelLabel,
|
||||
promptPrefix,
|
||||
maxContextTokens,
|
||||
resendFiles,
|
||||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
...rest
|
||||
} = parsedBody;
|
||||
const endpointOption = {
|
||||
endpoint,
|
||||
modelLabel,
|
||||
|
|
@ -8,6 +17,7 @@ const buildOptions = (endpoint, parsedBody) => {
|
|||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
maxContextTokens,
|
||||
modelOptions: {
|
||||
...rest,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,6 +1,15 @@
|
|||
const buildOptions = (endpoint, parsedBody, endpointType) => {
|
||||
const { chatGptLabel, promptPrefix, resendFiles, imageDetail, iconURL, greeting, spec, ...rest } =
|
||||
parsedBody;
|
||||
const {
|
||||
chatGptLabel,
|
||||
promptPrefix,
|
||||
maxContextTokens,
|
||||
resendFiles,
|
||||
imageDetail,
|
||||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
...rest
|
||||
} = parsedBody;
|
||||
const endpointOption = {
|
||||
endpoint,
|
||||
endpointType,
|
||||
|
|
@ -11,6 +20,7 @@ const buildOptions = (endpoint, parsedBody, endpointType) => {
|
|||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
maxContextTokens,
|
||||
modelOptions: {
|
||||
...rest,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ const buildOptions = (endpoint, parsedBody) => {
|
|||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
maxContextTokens,
|
||||
...modelOptions
|
||||
} = parsedBody;
|
||||
const endpointOption = {
|
||||
|
|
@ -21,6 +22,7 @@ const buildOptions = (endpoint, parsedBody) => {
|
|||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
maxContextTokens,
|
||||
modelOptions,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,15 @@
|
|||
const buildOptions = (endpoint, parsedBody) => {
|
||||
const { chatGptLabel, promptPrefix, resendFiles, imageDetail, iconURL, greeting, spec, ...rest } =
|
||||
parsedBody;
|
||||
const {
|
||||
chatGptLabel,
|
||||
promptPrefix,
|
||||
maxContextTokens,
|
||||
resendFiles,
|
||||
imageDetail,
|
||||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
...rest
|
||||
} = parsedBody;
|
||||
const endpointOption = {
|
||||
endpoint,
|
||||
chatGptLabel,
|
||||
|
|
@ -10,6 +19,7 @@ const buildOptions = (endpoint, parsedBody) => {
|
|||
iconURL,
|
||||
greeting,
|
||||
spec,
|
||||
maxContextTokens,
|
||||
modelOptions: {
|
||||
...rest,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ async function fetchImageToBase64(url) {
|
|||
}
|
||||
}
|
||||
|
||||
const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic]);
|
||||
const base64Only = new Set([EModelEndpoint.google, EModelEndpoint.anthropic, 'Ollama', 'ollama']);
|
||||
|
||||
/**
|
||||
* Encodes and formats the given files.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue