LibreChat/packages/data-provider/src/parsers.ts
Danny Avila 37b50736bc
🔧 fix: Google Gemma Support & OpenAI Reasoning Instructions (#7196)
* 🔄 chore: Update @langchain/google-vertexai to version 0.2.5 in package.json and package-lock.json

* chore: temp remove agents

* 🔄 chore: Update @langchain/google-genai to version 0.2.5 in package.json and package-lock.json

* 🔄 chore: Update @langchain/community to version 0.3.42 in package.json and package-lock.json

* 🔄 chore: Add license information for @langchain/textsplitters in package-lock.json

* 🔄 chore: Update @langchain/core to version 0.3.51 in package.json and package-lock.json

* 🔄 chore: Update openai dependency to version 4.96.2 in package.json and package-lock.json

* chore: @librechat/agents to v2.4.30

* fix: streaming condition in ModelEndHandler to account for boundModel `disableStreaming` setting

* fix: update regex for noSystemModel and refactor message handling in AgentClient

* feat: Google Gemma models

* chore: remove unnecessary empty JSX fragment in PopoverButtons component
2025-05-02 15:11:50 -04:00

452 lines
13 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import dayjs from 'dayjs';
import type { ZodIssue } from 'zod';
import type * as a from './types/assistants';
import type * as s from './schemas';
import type * as t from './types';
import { ContentTypes } from './types/runs';
import {
openAISchema,
googleSchema,
EModelEndpoint,
anthropicSchema,
assistantSchema,
gptPluginsSchema,
// agentsSchema,
compactAgentsSchema,
compactGoogleSchema,
compactPluginsSchema,
compactAssistantSchema,
} from './schemas';
import { bedrockInputSchema } from './bedrock';
import { extractEnvVariable } from './utils';
import { alternateName } from './config';
type EndpointSchema =
| typeof openAISchema
| typeof googleSchema
| typeof anthropicSchema
| typeof gptPluginsSchema
| typeof assistantSchema
| typeof compactAgentsSchema
| typeof bedrockInputSchema;
export type EndpointSchemaKey = Exclude<EModelEndpoint, EModelEndpoint.chatGPTBrowser>;
const endpointSchemas: Record<EndpointSchemaKey, EndpointSchema> = {
[EModelEndpoint.openAI]: openAISchema,
[EModelEndpoint.azureOpenAI]: openAISchema,
[EModelEndpoint.custom]: openAISchema,
[EModelEndpoint.google]: googleSchema,
[EModelEndpoint.anthropic]: anthropicSchema,
[EModelEndpoint.gptPlugins]: gptPluginsSchema,
[EModelEndpoint.assistants]: assistantSchema,
[EModelEndpoint.azureAssistants]: assistantSchema,
[EModelEndpoint.agents]: compactAgentsSchema,
[EModelEndpoint.bedrock]: bedrockInputSchema,
};
// const schemaCreators: Record<EModelEndpoint, (customSchema: DefaultSchemaValues) => EndpointSchema> = {
// [EModelEndpoint.google]: createGoogleSchema,
// };
/** Get the enabled endpoints from the `ENDPOINTS` environment variable */
export function getEnabledEndpoints() {
const defaultEndpoints: string[] = [
EModelEndpoint.openAI,
EModelEndpoint.agents,
EModelEndpoint.assistants,
EModelEndpoint.azureAssistants,
EModelEndpoint.azureOpenAI,
EModelEndpoint.google,
EModelEndpoint.chatGPTBrowser,
EModelEndpoint.gptPlugins,
EModelEndpoint.anthropic,
EModelEndpoint.bedrock,
];
const endpointsEnv = process.env.ENDPOINTS ?? '';
let enabledEndpoints = defaultEndpoints;
if (endpointsEnv) {
enabledEndpoints = endpointsEnv
.split(',')
.filter((endpoint) => endpoint.trim())
.map((endpoint) => endpoint.trim());
}
return enabledEndpoints;
}
/** Orders an existing EndpointsConfig object based on enabled endpoint/custom ordering */
export function orderEndpointsConfig(endpointsConfig: t.TEndpointsConfig) {
if (!endpointsConfig) {
return {};
}
const enabledEndpoints = getEnabledEndpoints();
const endpointKeys = Object.keys(endpointsConfig);
const defaultCustomIndex = enabledEndpoints.indexOf(EModelEndpoint.custom);
return endpointKeys.reduce(
(accumulatedConfig: Record<string, t.TConfig | null | undefined>, currentEndpointKey) => {
const isCustom = !(currentEndpointKey in EModelEndpoint);
const isEnabled = enabledEndpoints.includes(currentEndpointKey);
if (!isEnabled && !isCustom) {
return accumulatedConfig;
}
const index = enabledEndpoints.indexOf(currentEndpointKey);
if (isCustom) {
accumulatedConfig[currentEndpointKey] = {
order: defaultCustomIndex >= 0 ? defaultCustomIndex : 9999,
...(endpointsConfig[currentEndpointKey] as Omit<t.TConfig, 'order'> & { order?: number }),
};
} else if (endpointsConfig[currentEndpointKey]) {
accumulatedConfig[currentEndpointKey] = {
...endpointsConfig[currentEndpointKey],
order: index,
};
}
return accumulatedConfig;
},
{},
);
}
/** Converts an array of Zod issues into a string. */
export function errorsToString(errors: ZodIssue[]) {
return errors
.map((error) => {
const field = error.path.join('.');
const message = error.message;
return `${field}: ${message}`;
})
.join(' ');
}
/** Resolves header values to env variables if detected */
export function resolveHeaders(headers: Record<string, string> | undefined) {
const resolvedHeaders = { ...(headers ?? {}) };
if (headers && typeof headers === 'object' && !Array.isArray(headers)) {
Object.keys(headers).forEach((key) => {
resolvedHeaders[key] = extractEnvVariable(headers[key]);
});
}
return resolvedHeaders;
}
export function getFirstDefinedValue(possibleValues: string[]) {
let returnValue;
for (const value of possibleValues) {
if (value) {
returnValue = value;
break;
}
}
return returnValue;
}
export function getNonEmptyValue(possibleValues: string[]) {
for (const value of possibleValues) {
if (value && value.trim() !== '') {
return value;
}
}
return undefined;
}
export type TPossibleValues = {
models: string[];
secondaryModels?: string[];
};
export const parseConvo = ({
endpoint,
endpointType,
conversation,
possibleValues,
}: {
endpoint: EndpointSchemaKey;
endpointType?: EndpointSchemaKey | null;
conversation: Partial<s.TConversation | s.TPreset> | null;
possibleValues?: TPossibleValues;
// TODO: POC for default schema
// defaultSchema?: Partial<EndpointSchema>,
}) => {
let schema = endpointSchemas[endpoint] as EndpointSchema | undefined;
if (!schema && !endpointType) {
throw new Error(`Unknown endpoint: ${endpoint}`);
} else if (!schema && endpointType) {
schema = endpointSchemas[endpointType];
}
// if (defaultSchema && schemaCreators[endpoint]) {
// schema = schemaCreators[endpoint](defaultSchema);
// }
const convo = schema?.parse(conversation) as s.TConversation | undefined;
const { models, secondaryModels } = possibleValues ?? {};
if (models && convo) {
convo.model = getFirstDefinedValue(models) ?? convo.model;
}
if (secondaryModels && convo?.agentOptions) {
convo.agentOptions.model = getFirstDefinedValue(secondaryModels) ?? convo.agentOptions.model;
}
return convo;
};
/** Match GPT followed by digit, optional decimal, and optional suffix
*
* Examples: gpt-4, gpt-4o, gpt-4.5, gpt-5a, etc. */
const extractGPTVersion = (modelStr: string): string => {
const gptMatch = modelStr.match(/gpt-(\d+(?:\.\d+)?)([a-z])?/i);
if (gptMatch) {
const version = gptMatch[1];
const suffix = gptMatch[2] || '';
return `GPT-${version}${suffix}`;
}
return '';
};
/** Match omni models (o1, o3, etc.), "o" followed by a digit, possibly with decimal */
const extractOmniVersion = (modelStr: string): string => {
const omniMatch = modelStr.match(/\bo(\d+(?:\.\d+)?)\b/i);
if (omniMatch) {
const version = omniMatch[1];
return `o${version}`;
}
return '';
};
export const getResponseSender = (endpointOption: t.TEndpointOption): string => {
const {
model: _m,
endpoint,
endpointType,
modelDisplayLabel: _mdl,
chatGptLabel: _cgl,
modelLabel: _ml,
} = endpointOption;
const model = _m ?? '';
const modelDisplayLabel = _mdl ?? '';
const chatGptLabel = _cgl ?? '';
const modelLabel = _ml ?? '';
if (
[
EModelEndpoint.openAI,
EModelEndpoint.bedrock,
EModelEndpoint.gptPlugins,
EModelEndpoint.azureOpenAI,
EModelEndpoint.chatGPTBrowser,
].includes(endpoint)
) {
if (chatGptLabel) {
return chatGptLabel;
} else if (modelLabel) {
return modelLabel;
} else if (model && extractOmniVersion(model)) {
return extractOmniVersion(model);
} else if (model && (model.includes('mistral') || model.includes('codestral'))) {
return 'Mistral';
} else if (model && model.includes('deepseek')) {
return 'Deepseek';
} else if (model && model.includes('gpt-')) {
const gptVersion = extractGPTVersion(model);
return gptVersion || 'GPT';
}
return (alternateName[endpoint] as string | undefined) ?? 'ChatGPT';
}
if (endpoint === EModelEndpoint.anthropic) {
return modelLabel || 'Claude';
}
if (endpoint === EModelEndpoint.bedrock) {
return modelLabel || alternateName[endpoint];
}
if (endpoint === EModelEndpoint.google) {
if (modelLabel) {
return modelLabel;
} else if (model && (model.includes('gemini') || model.includes('learnlm'))) {
return 'Gemini';
} else if (model?.toLowerCase().includes('gemma') === true) {
return 'Gemma';
} else if (model && model.includes('code')) {
return 'Codey';
}
return 'PaLM2';
}
if (endpoint === EModelEndpoint.custom || endpointType === EModelEndpoint.custom) {
if (modelLabel) {
return modelLabel;
} else if (chatGptLabel) {
return chatGptLabel;
} else if (model && extractOmniVersion(model)) {
return extractOmniVersion(model);
} else if (model && (model.includes('mistral') || model.includes('codestral'))) {
return 'Mistral';
} else if (model && model.includes('deepseek')) {
return 'Deepseek';
} else if (model && model.includes('gpt-')) {
const gptVersion = extractGPTVersion(model);
return gptVersion || 'GPT';
} else if (modelDisplayLabel) {
return modelDisplayLabel;
}
return 'AI';
}
return '';
};
type CompactEndpointSchema =
| typeof openAISchema
| typeof compactAssistantSchema
| typeof compactAgentsSchema
| typeof compactGoogleSchema
| typeof anthropicSchema
| typeof bedrockInputSchema
| typeof compactPluginsSchema;
const compactEndpointSchemas: Record<EndpointSchemaKey, CompactEndpointSchema> = {
[EModelEndpoint.openAI]: openAISchema,
[EModelEndpoint.azureOpenAI]: openAISchema,
[EModelEndpoint.custom]: openAISchema,
[EModelEndpoint.assistants]: compactAssistantSchema,
[EModelEndpoint.azureAssistants]: compactAssistantSchema,
[EModelEndpoint.agents]: compactAgentsSchema,
[EModelEndpoint.google]: compactGoogleSchema,
[EModelEndpoint.bedrock]: bedrockInputSchema,
[EModelEndpoint.anthropic]: anthropicSchema,
[EModelEndpoint.gptPlugins]: compactPluginsSchema,
};
export const parseCompactConvo = ({
endpoint,
endpointType,
conversation,
possibleValues,
}: {
endpoint?: EndpointSchemaKey;
endpointType?: EndpointSchemaKey | null;
conversation: Partial<s.TConversation | s.TPreset>;
possibleValues?: TPossibleValues;
// TODO: POC for default schema
// defaultSchema?: Partial<EndpointSchema>,
}) => {
if (!endpoint) {
throw new Error(`undefined endpoint: ${endpoint}`);
}
let schema = compactEndpointSchemas[endpoint] as CompactEndpointSchema | undefined;
if (!schema && !endpointType) {
throw new Error(`Unknown endpoint: ${endpoint}`);
} else if (!schema && endpointType) {
schema = compactEndpointSchemas[endpointType];
}
if (!schema) {
throw new Error(`Unknown endpointType: ${endpointType}`);
}
const convo = schema.parse(conversation) as s.TConversation | null;
// const { models, secondaryModels } = possibleValues ?? {};
const { models } = possibleValues ?? {};
if (models && convo) {
convo.model = getFirstDefinedValue(models) ?? convo.model;
}
// if (secondaryModels && convo.agentOptions) {
// convo.agentOptionmodel = getFirstDefinedValue(secondaryModels) ?? convo.agentOptionmodel;
// }
return convo;
};
export function parseTextParts(
contentParts: a.TMessageContentParts[],
skipReasoning: boolean = false,
): string {
let result = '';
for (const part of contentParts) {
if (!part.type) {
continue;
}
if (part.type === ContentTypes.TEXT) {
const textValue = typeof part.text === 'string' ? part.text : part.text.value;
if (
result.length > 0 &&
textValue.length > 0 &&
result[result.length - 1] !== ' ' &&
textValue[0] !== ' '
) {
result += ' ';
}
result += textValue;
} else if (part.type === ContentTypes.THINK && !skipReasoning) {
const textValue = typeof part.think === 'string' ? part.think : '';
if (
result.length > 0 &&
textValue.length > 0 &&
result[result.length - 1] !== ' ' &&
textValue[0] !== ' '
) {
result += ' ';
}
result += textValue;
}
}
return result;
}
export const SEPARATORS = ['.', '?', '!', '۔', '。', '‥', ';', '¡', '¿', '\n', '```'];
export function findLastSeparatorIndex(text: string, separators = SEPARATORS): number {
let lastIndex = -1;
for (const separator of separators) {
const index = text.lastIndexOf(separator);
if (index > lastIndex) {
lastIndex = index;
}
}
return lastIndex;
}
export function replaceSpecialVars({ text, user }: { text: string; user?: t.TUser | null }) {
let result = text;
if (!result) {
return result;
}
// e.g., "2024-04-29 (1)" (1=Monday)
const currentDate = dayjs().format('YYYY-MM-DD');
const dayNumber = dayjs().day();
const combinedDate = `${currentDate} (${dayNumber})`;
result = result.replace(/{{current_date}}/gi, combinedDate);
const currentDatetime = dayjs().format('YYYY-MM-DD HH:mm:ss');
result = result.replace(/{{current_datetime}}/gi, `${currentDatetime} (${dayNumber})`);
const isoDatetime = dayjs().toISOString();
result = result.replace(/{{iso_datetime}}/gi, isoDatetime);
if (user && user.name) {
result = result.replace(/{{current_user}}/gi, user.name);
}
return result;
}