mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 00:40:14 +01:00
🖼️ fix: Extract File Context & Persist Attachments (#10069)
- problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle - solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments
This commit is contained in:
parent
fbe341a171
commit
07d0abc9fd
5 changed files with 128 additions and 50 deletions
|
|
@ -3,6 +3,7 @@ const fetch = require('node-fetch');
|
||||||
const { logger } = require('@librechat/data-schemas');
|
const { logger } = require('@librechat/data-schemas');
|
||||||
const {
|
const {
|
||||||
getBalanceConfig,
|
getBalanceConfig,
|
||||||
|
extractFileContext,
|
||||||
encodeAndFormatAudios,
|
encodeAndFormatAudios,
|
||||||
encodeAndFormatVideos,
|
encodeAndFormatVideos,
|
||||||
encodeAndFormatDocuments,
|
encodeAndFormatDocuments,
|
||||||
|
|
@ -10,6 +11,7 @@ const {
|
||||||
const {
|
const {
|
||||||
Constants,
|
Constants,
|
||||||
ErrorTypes,
|
ErrorTypes,
|
||||||
|
FileSources,
|
||||||
ContentTypes,
|
ContentTypes,
|
||||||
excludedKeys,
|
excludedKeys,
|
||||||
EModelEndpoint,
|
EModelEndpoint,
|
||||||
|
|
@ -21,6 +23,7 @@ const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require
|
||||||
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
||||||
const { checkBalance } = require('~/models/balanceMethods');
|
const { checkBalance } = require('~/models/balanceMethods');
|
||||||
const { truncateToolCallOutputs } = require('./prompts');
|
const { truncateToolCallOutputs } = require('./prompts');
|
||||||
|
const countTokens = require('~/server/utils/countTokens');
|
||||||
const { getFiles } = require('~/models/File');
|
const { getFiles } = require('~/models/File');
|
||||||
const TextStream = require('./TextStream');
|
const TextStream = require('./TextStream');
|
||||||
|
|
||||||
|
|
@ -1245,27 +1248,62 @@ class BaseClient {
|
||||||
return audioResult.files;
|
return audioResult.files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts text context from attachments and sets it on the message.
|
||||||
|
* This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
|
||||||
|
* @param {TMessage} message - The message to add context to
|
||||||
|
* @param {MongoFile[]} attachments - Array of file attachments
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async addFileContextToMessage(message, attachments) {
|
||||||
|
const fileContext = await extractFileContext({
|
||||||
|
attachments,
|
||||||
|
req: this.options?.req,
|
||||||
|
tokenCountFn: (text) => countTokens(text),
|
||||||
|
});
|
||||||
|
|
||||||
|
if (fileContext) {
|
||||||
|
message.fileContext = fileContext;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async processAttachments(message, attachments) {
|
async processAttachments(message, attachments) {
|
||||||
const categorizedAttachments = {
|
const categorizedAttachments = {
|
||||||
images: [],
|
images: [],
|
||||||
documents: [],
|
|
||||||
videos: [],
|
videos: [],
|
||||||
audios: [],
|
audios: [],
|
||||||
|
documents: [],
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const allFiles = [];
|
||||||
|
|
||||||
for (const file of attachments) {
|
for (const file of attachments) {
|
||||||
|
/** @type {FileSources} */
|
||||||
|
const source = file.source ?? FileSources.local;
|
||||||
|
if (source === FileSources.text) {
|
||||||
|
allFiles.push(file);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (file.embedded === true || file.metadata?.fileIdentifier != null) {
|
||||||
|
allFiles.push(file);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (file.type.startsWith('image/')) {
|
if (file.type.startsWith('image/')) {
|
||||||
categorizedAttachments.images.push(file);
|
categorizedAttachments.images.push(file);
|
||||||
} else if (file.type === 'application/pdf') {
|
} else if (file.type === 'application/pdf') {
|
||||||
categorizedAttachments.documents.push(file);
|
categorizedAttachments.documents.push(file);
|
||||||
|
allFiles.push(file);
|
||||||
} else if (file.type.startsWith('video/')) {
|
} else if (file.type.startsWith('video/')) {
|
||||||
categorizedAttachments.videos.push(file);
|
categorizedAttachments.videos.push(file);
|
||||||
|
allFiles.push(file);
|
||||||
} else if (file.type.startsWith('audio/')) {
|
} else if (file.type.startsWith('audio/')) {
|
||||||
categorizedAttachments.audios.push(file);
|
categorizedAttachments.audios.push(file);
|
||||||
|
allFiles.push(file);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const [imageFiles, documentFiles, videoFiles, audioFiles] = await Promise.all([
|
const [imageFiles] = await Promise.all([
|
||||||
categorizedAttachments.images.length > 0
|
categorizedAttachments.images.length > 0
|
||||||
? this.addImageURLs(message, categorizedAttachments.images)
|
? this.addImageURLs(message, categorizedAttachments.images)
|
||||||
: Promise.resolve([]),
|
: Promise.resolve([]),
|
||||||
|
|
@ -1280,7 +1318,8 @@ class BaseClient {
|
||||||
: Promise.resolve([]),
|
: Promise.resolve([]),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const allFiles = [...imageFiles, ...documentFiles, ...videoFiles, ...audioFiles];
|
allFiles.push(...imageFiles);
|
||||||
|
|
||||||
const seenFileIds = new Set();
|
const seenFileIds = new Set();
|
||||||
const uniqueFiles = [];
|
const uniqueFiles = [];
|
||||||
|
|
||||||
|
|
@ -1345,6 +1384,7 @@ class BaseClient {
|
||||||
{},
|
{},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
await this.addFileContextToMessage(message, files);
|
||||||
await this.processAttachments(message, files);
|
await this.processAttachments(message, files);
|
||||||
|
|
||||||
this.message_file_map[message.messageId] = files;
|
this.message_file_map[message.messageId] = files;
|
||||||
|
|
|
||||||
|
|
@ -211,16 +211,13 @@ class AgentClient extends BaseClient {
|
||||||
* @returns {Promise<Array<Partial<MongoFile>>>}
|
* @returns {Promise<Array<Partial<MongoFile>>>}
|
||||||
*/
|
*/
|
||||||
async addImageURLs(message, attachments) {
|
async addImageURLs(message, attachments) {
|
||||||
const { files, text, image_urls } = await encodeAndFormat(
|
const { files, image_urls } = await encodeAndFormat(
|
||||||
this.options.req,
|
this.options.req,
|
||||||
attachments,
|
attachments,
|
||||||
this.options.agent.provider,
|
this.options.agent.provider,
|
||||||
VisionModes.agents,
|
VisionModes.agents,
|
||||||
);
|
);
|
||||||
message.image_urls = image_urls.length ? image_urls : undefined;
|
message.image_urls = image_urls.length ? image_urls : undefined;
|
||||||
if (text && text.length) {
|
|
||||||
message.ocr = text;
|
|
||||||
}
|
|
||||||
return files;
|
return files;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -248,19 +245,18 @@ class AgentClient extends BaseClient {
|
||||||
|
|
||||||
if (this.options.attachments) {
|
if (this.options.attachments) {
|
||||||
const attachments = await this.options.attachments;
|
const attachments = await this.options.attachments;
|
||||||
|
const latestMessage = orderedMessages[orderedMessages.length - 1];
|
||||||
|
|
||||||
if (this.message_file_map) {
|
if (this.message_file_map) {
|
||||||
this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments;
|
this.message_file_map[latestMessage.messageId] = attachments;
|
||||||
} else {
|
} else {
|
||||||
this.message_file_map = {
|
this.message_file_map = {
|
||||||
[orderedMessages[orderedMessages.length - 1].messageId]: attachments,
|
[latestMessage.messageId]: attachments,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
const files = await this.processAttachments(
|
await this.addFileContextToMessage(latestMessage, attachments);
|
||||||
orderedMessages[orderedMessages.length - 1],
|
const files = await this.processAttachments(latestMessage, attachments);
|
||||||
attachments,
|
|
||||||
);
|
|
||||||
|
|
||||||
this.options.attachments = files;
|
this.options.attachments = files;
|
||||||
}
|
}
|
||||||
|
|
@ -280,21 +276,21 @@ class AgentClient extends BaseClient {
|
||||||
assistantName: this.options?.modelLabel,
|
assistantName: this.options?.modelLabel,
|
||||||
});
|
});
|
||||||
|
|
||||||
if (message.ocr && i !== orderedMessages.length - 1) {
|
if (message.fileContext && i !== orderedMessages.length - 1) {
|
||||||
if (typeof formattedMessage.content === 'string') {
|
if (typeof formattedMessage.content === 'string') {
|
||||||
formattedMessage.content = message.ocr + '\n' + formattedMessage.content;
|
formattedMessage.content = message.fileContext + '\n' + formattedMessage.content;
|
||||||
} else {
|
} else {
|
||||||
const textPart = formattedMessage.content.find((part) => part.type === 'text');
|
const textPart = formattedMessage.content.find((part) => part.type === 'text');
|
||||||
textPart
|
textPart
|
||||||
? (textPart.text = message.ocr + '\n' + textPart.text)
|
? (textPart.text = message.fileContext + '\n' + textPart.text)
|
||||||
: formattedMessage.content.unshift({ type: 'text', text: message.ocr });
|
: formattedMessage.content.unshift({ type: 'text', text: message.fileContext });
|
||||||
}
|
}
|
||||||
} else if (message.ocr && i === orderedMessages.length - 1) {
|
} else if (message.fileContext && i === orderedMessages.length - 1) {
|
||||||
systemContent = [systemContent, message.ocr].join('\n');
|
systemContent = [systemContent, message.fileContext].join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
const needsTokenCount =
|
const needsTokenCount =
|
||||||
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.ocr;
|
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.fileContext;
|
||||||
|
|
||||||
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
|
/* If tokens were never counted, or, is a Vision request and the message has files, count again */
|
||||||
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
|
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {
|
||||||
|
|
|
||||||
|
|
@ -1,16 +1,14 @@
|
||||||
const axios = require('axios');
|
const axios = require('axios');
|
||||||
|
const { logAxiosError } = require('@librechat/api');
|
||||||
const { logger } = require('@librechat/data-schemas');
|
const { logger } = require('@librechat/data-schemas');
|
||||||
const { logAxiosError, processTextWithTokenLimit } = require('@librechat/api');
|
|
||||||
const {
|
const {
|
||||||
FileSources,
|
FileSources,
|
||||||
VisionModes,
|
VisionModes,
|
||||||
ImageDetail,
|
ImageDetail,
|
||||||
ContentTypes,
|
ContentTypes,
|
||||||
EModelEndpoint,
|
EModelEndpoint,
|
||||||
mergeFileConfig,
|
|
||||||
} = require('librechat-data-provider');
|
} = require('librechat-data-provider');
|
||||||
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
const { getStrategyFunctions } = require('~/server/services/Files/strategies');
|
||||||
const countTokens = require('~/server/utils/countTokens');
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts a readable stream to a base64 encoded string.
|
* Converts a readable stream to a base64 encoded string.
|
||||||
|
|
@ -88,15 +86,14 @@ const blobStorageSources = new Set([FileSources.azure_blob, FileSources.s3]);
|
||||||
* @param {Array<MongoFile>} files - The array of files to encode and format.
|
* @param {Array<MongoFile>} files - The array of files to encode and format.
|
||||||
* @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image.
|
* @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image.
|
||||||
* @param {string} [mode] - Optional: The endpoint mode for the image.
|
* @param {string} [mode] - Optional: The endpoint mode for the image.
|
||||||
* @returns {Promise<{ text: string; files: MongoFile[]; image_urls: MessageContentImageUrl[] }>} - A promise that resolves to the result object containing the encoded images and file details.
|
* @returns {Promise<{ files: MongoFile[]; image_urls: MessageContentImageUrl[] }>} - A promise that resolves to the result object containing the encoded images and file details.
|
||||||
*/
|
*/
|
||||||
async function encodeAndFormat(req, files, endpoint, mode) {
|
async function encodeAndFormat(req, files, endpoint, mode) {
|
||||||
const promises = [];
|
const promises = [];
|
||||||
/** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareImagePayload' | 'getDownloadStream'>>} */
|
/** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareImagePayload' | 'getDownloadStream'>>} */
|
||||||
const encodingMethods = {};
|
const encodingMethods = {};
|
||||||
/** @type {{ text: string; files: MongoFile[]; image_urls: MessageContentImageUrl[] }} */
|
/** @type {{ files: MongoFile[]; image_urls: MessageContentImageUrl[] }} */
|
||||||
const result = {
|
const result = {
|
||||||
text: '',
|
|
||||||
files: [],
|
files: [],
|
||||||
image_urls: [],
|
image_urls: [],
|
||||||
};
|
};
|
||||||
|
|
@ -105,29 +102,9 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
const fileTokenLimit =
|
|
||||||
req.body?.fileTokenLimit ?? mergeFileConfig(req.config?.fileConfig).fileTokenLimit;
|
|
||||||
|
|
||||||
for (let file of files) {
|
for (let file of files) {
|
||||||
/** @type {FileSources} */
|
/** @type {FileSources} */
|
||||||
const source = file.source ?? FileSources.local;
|
const source = file.source ?? FileSources.local;
|
||||||
if (source === FileSources.text && file.text) {
|
|
||||||
let fileText = file.text;
|
|
||||||
|
|
||||||
const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
|
|
||||||
text: fileText,
|
|
||||||
tokenLimit: fileTokenLimit,
|
|
||||||
tokenCountFn: (text) => countTokens(text),
|
|
||||||
});
|
|
||||||
|
|
||||||
if (wasTruncated) {
|
|
||||||
logger.debug(
|
|
||||||
`[encodeAndFormat] Text content truncated for file: ${file.filename} due to token limits`,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!file.height) {
|
if (!file.height) {
|
||||||
promises.push([file, null]);
|
promises.push([file, null]);
|
||||||
|
|
@ -165,10 +142,6 @@ async function encodeAndFormat(req, files, endpoint, mode) {
|
||||||
promises.push(preparePayload(req, file));
|
promises.push(preparePayload(req, file));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result.text) {
|
|
||||||
result.text += '\n```';
|
|
||||||
}
|
|
||||||
|
|
||||||
const detail = req.body.imageDetail ?? ImageDetail.auto;
|
const detail = req.body.imageDetail ?? ImageDetail.auto;
|
||||||
|
|
||||||
/** @type {Array<[MongoFile, string]>} */
|
/** @type {Array<[MongoFile, string]>} */
|
||||||
|
|
|
||||||
68
packages/api/src/files/context.ts
Normal file
68
packages/api/src/files/context.ts
Normal file
|
|
@ -0,0 +1,68 @@
|
||||||
|
import { logger } from '@librechat/data-schemas';
|
||||||
|
import { FileSources, mergeFileConfig } from 'librechat-data-provider';
|
||||||
|
import type { fileConfigSchema } from 'librechat-data-provider';
|
||||||
|
import type { IMongoFile } from '@librechat/data-schemas';
|
||||||
|
import type { z } from 'zod';
|
||||||
|
import { processTextWithTokenLimit } from '~/utils/text';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extracts text context from attachments and returns formatted text.
|
||||||
|
* This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
|
||||||
|
* @param params - The parameters object
|
||||||
|
* @param params.attachments - Array of file attachments
|
||||||
|
* @param params.req - Express request object for config access
|
||||||
|
* @param params.tokenCountFn - Function to count tokens in text
|
||||||
|
* @returns The formatted file context text, or undefined if no text found
|
||||||
|
*/
|
||||||
|
export async function extractFileContext({
|
||||||
|
attachments,
|
||||||
|
req,
|
||||||
|
tokenCountFn,
|
||||||
|
}: {
|
||||||
|
attachments: IMongoFile[];
|
||||||
|
req?: {
|
||||||
|
body?: { fileTokenLimit?: number };
|
||||||
|
config?: { fileConfig?: z.infer<typeof fileConfigSchema> };
|
||||||
|
};
|
||||||
|
tokenCountFn: (text: string) => number;
|
||||||
|
}): Promise<string | undefined> {
|
||||||
|
if (!attachments || attachments.length === 0) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
const fileConfig = mergeFileConfig(req?.config?.fileConfig);
|
||||||
|
const fileTokenLimit = req?.body?.fileTokenLimit ?? fileConfig.fileTokenLimit;
|
||||||
|
|
||||||
|
if (!fileTokenLimit) {
|
||||||
|
// If no token limit, return undefined (no processing)
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
let resultText = '';
|
||||||
|
|
||||||
|
for (const file of attachments) {
|
||||||
|
const source = file.source ?? FileSources.local;
|
||||||
|
if (source === FileSources.text && file.text) {
|
||||||
|
const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
|
||||||
|
text: file.text,
|
||||||
|
tokenLimit: fileTokenLimit,
|
||||||
|
tokenCountFn,
|
||||||
|
});
|
||||||
|
|
||||||
|
if (wasTruncated) {
|
||||||
|
logger.debug(
|
||||||
|
`[extractFileContext] Text content truncated for file: ${file.filename} due to token limits`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
resultText += `${!resultText ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (resultText) {
|
||||||
|
resultText += '\n```';
|
||||||
|
return resultText;
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
export * from './audio';
|
export * from './audio';
|
||||||
|
export * from './context';
|
||||||
export * from './encode';
|
export * from './encode';
|
||||||
export * from './mistral/crud';
|
export * from './mistral/crud';
|
||||||
export * from './ocr';
|
export * from './ocr';
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue