🖼️ fix: Extract File Context & Persist Attachments (#10069)

- problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle
- solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments
This commit is contained in:
Danny Avila 2025-10-10 12:35:37 +03:00 committed by GitHub
parent fbe341a171
commit 07d0abc9fd
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 128 additions and 50 deletions

View file

@ -3,6 +3,7 @@ const fetch = require('node-fetch');
const { logger } = require('@librechat/data-schemas'); const { logger } = require('@librechat/data-schemas');
const { const {
getBalanceConfig, getBalanceConfig,
extractFileContext,
encodeAndFormatAudios, encodeAndFormatAudios,
encodeAndFormatVideos, encodeAndFormatVideos,
encodeAndFormatDocuments, encodeAndFormatDocuments,
@ -10,6 +11,7 @@ const {
const { const {
Constants, Constants,
ErrorTypes, ErrorTypes,
FileSources,
ContentTypes, ContentTypes,
excludedKeys, excludedKeys,
EModelEndpoint, EModelEndpoint,
@ -21,6 +23,7 @@ const { getMessages, saveMessage, updateMessage, saveConvo, getConvo } = require
const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const { checkBalance } = require('~/models/balanceMethods'); const { checkBalance } = require('~/models/balanceMethods');
const { truncateToolCallOutputs } = require('./prompts'); const { truncateToolCallOutputs } = require('./prompts');
const countTokens = require('~/server/utils/countTokens');
const { getFiles } = require('~/models/File'); const { getFiles } = require('~/models/File');
const TextStream = require('./TextStream'); const TextStream = require('./TextStream');
@ -1245,27 +1248,62 @@ class BaseClient {
return audioResult.files; return audioResult.files;
} }
/**
* Extracts text context from attachments and sets it on the message.
* This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
* @param {TMessage} message - The message to add context to
* @param {MongoFile[]} attachments - Array of file attachments
* @returns {Promise<void>}
*/
async addFileContextToMessage(message, attachments) {
const fileContext = await extractFileContext({
attachments,
req: this.options?.req,
tokenCountFn: (text) => countTokens(text),
});
if (fileContext) {
message.fileContext = fileContext;
}
}
async processAttachments(message, attachments) { async processAttachments(message, attachments) {
const categorizedAttachments = { const categorizedAttachments = {
images: [], images: [],
documents: [],
videos: [], videos: [],
audios: [], audios: [],
documents: [],
}; };
const allFiles = [];
for (const file of attachments) { for (const file of attachments) {
/** @type {FileSources} */
const source = file.source ?? FileSources.local;
if (source === FileSources.text) {
allFiles.push(file);
continue;
}
if (file.embedded === true || file.metadata?.fileIdentifier != null) {
allFiles.push(file);
continue;
}
if (file.type.startsWith('image/')) { if (file.type.startsWith('image/')) {
categorizedAttachments.images.push(file); categorizedAttachments.images.push(file);
} else if (file.type === 'application/pdf') { } else if (file.type === 'application/pdf') {
categorizedAttachments.documents.push(file); categorizedAttachments.documents.push(file);
allFiles.push(file);
} else if (file.type.startsWith('video/')) { } else if (file.type.startsWith('video/')) {
categorizedAttachments.videos.push(file); categorizedAttachments.videos.push(file);
allFiles.push(file);
} else if (file.type.startsWith('audio/')) { } else if (file.type.startsWith('audio/')) {
categorizedAttachments.audios.push(file); categorizedAttachments.audios.push(file);
allFiles.push(file);
} }
} }
const [imageFiles, documentFiles, videoFiles, audioFiles] = await Promise.all([ const [imageFiles] = await Promise.all([
categorizedAttachments.images.length > 0 categorizedAttachments.images.length > 0
? this.addImageURLs(message, categorizedAttachments.images) ? this.addImageURLs(message, categorizedAttachments.images)
: Promise.resolve([]), : Promise.resolve([]),
@ -1280,7 +1318,8 @@ class BaseClient {
: Promise.resolve([]), : Promise.resolve([]),
]); ]);
const allFiles = [...imageFiles, ...documentFiles, ...videoFiles, ...audioFiles]; allFiles.push(...imageFiles);
const seenFileIds = new Set(); const seenFileIds = new Set();
const uniqueFiles = []; const uniqueFiles = [];
@ -1345,6 +1384,7 @@ class BaseClient {
{}, {},
); );
await this.addFileContextToMessage(message, files);
await this.processAttachments(message, files); await this.processAttachments(message, files);
this.message_file_map[message.messageId] = files; this.message_file_map[message.messageId] = files;

View file

@ -211,16 +211,13 @@ class AgentClient extends BaseClient {
* @returns {Promise<Array<Partial<MongoFile>>>} * @returns {Promise<Array<Partial<MongoFile>>>}
*/ */
async addImageURLs(message, attachments) { async addImageURLs(message, attachments) {
const { files, text, image_urls } = await encodeAndFormat( const { files, image_urls } = await encodeAndFormat(
this.options.req, this.options.req,
attachments, attachments,
this.options.agent.provider, this.options.agent.provider,
VisionModes.agents, VisionModes.agents,
); );
message.image_urls = image_urls.length ? image_urls : undefined; message.image_urls = image_urls.length ? image_urls : undefined;
if (text && text.length) {
message.ocr = text;
}
return files; return files;
} }
@ -248,19 +245,18 @@ class AgentClient extends BaseClient {
if (this.options.attachments) { if (this.options.attachments) {
const attachments = await this.options.attachments; const attachments = await this.options.attachments;
const latestMessage = orderedMessages[orderedMessages.length - 1];
if (this.message_file_map) { if (this.message_file_map) {
this.message_file_map[orderedMessages[orderedMessages.length - 1].messageId] = attachments; this.message_file_map[latestMessage.messageId] = attachments;
} else { } else {
this.message_file_map = { this.message_file_map = {
[orderedMessages[orderedMessages.length - 1].messageId]: attachments, [latestMessage.messageId]: attachments,
}; };
} }
const files = await this.processAttachments( await this.addFileContextToMessage(latestMessage, attachments);
orderedMessages[orderedMessages.length - 1], const files = await this.processAttachments(latestMessage, attachments);
attachments,
);
this.options.attachments = files; this.options.attachments = files;
} }
@ -280,21 +276,21 @@ class AgentClient extends BaseClient {
assistantName: this.options?.modelLabel, assistantName: this.options?.modelLabel,
}); });
if (message.ocr && i !== orderedMessages.length - 1) { if (message.fileContext && i !== orderedMessages.length - 1) {
if (typeof formattedMessage.content === 'string') { if (typeof formattedMessage.content === 'string') {
formattedMessage.content = message.ocr + '\n' + formattedMessage.content; formattedMessage.content = message.fileContext + '\n' + formattedMessage.content;
} else { } else {
const textPart = formattedMessage.content.find((part) => part.type === 'text'); const textPart = formattedMessage.content.find((part) => part.type === 'text');
textPart textPart
? (textPart.text = message.ocr + '\n' + textPart.text) ? (textPart.text = message.fileContext + '\n' + textPart.text)
: formattedMessage.content.unshift({ type: 'text', text: message.ocr }); : formattedMessage.content.unshift({ type: 'text', text: message.fileContext });
} }
} else if (message.ocr && i === orderedMessages.length - 1) { } else if (message.fileContext && i === orderedMessages.length - 1) {
systemContent = [systemContent, message.ocr].join('\n'); systemContent = [systemContent, message.fileContext].join('\n');
} }
const needsTokenCount = const needsTokenCount =
(this.contextStrategy && !orderedMessages[i].tokenCount) || message.ocr; (this.contextStrategy && !orderedMessages[i].tokenCount) || message.fileContext;
/* If tokens were never counted, or, is a Vision request and the message has files, count again */ /* If tokens were never counted, or, is a Vision request and the message has files, count again */
if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) { if (needsTokenCount || (this.isVisionModel && (message.image_urls || message.files))) {

View file

@ -1,16 +1,14 @@
const axios = require('axios'); const axios = require('axios');
const { logAxiosError } = require('@librechat/api');
const { logger } = require('@librechat/data-schemas'); const { logger } = require('@librechat/data-schemas');
const { logAxiosError, processTextWithTokenLimit } = require('@librechat/api');
const { const {
FileSources, FileSources,
VisionModes, VisionModes,
ImageDetail, ImageDetail,
ContentTypes, ContentTypes,
EModelEndpoint, EModelEndpoint,
mergeFileConfig,
} = require('librechat-data-provider'); } = require('librechat-data-provider');
const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const { getStrategyFunctions } = require('~/server/services/Files/strategies');
const countTokens = require('~/server/utils/countTokens');
/** /**
* Converts a readable stream to a base64 encoded string. * Converts a readable stream to a base64 encoded string.
@ -88,15 +86,14 @@ const blobStorageSources = new Set([FileSources.azure_blob, FileSources.s3]);
* @param {Array<MongoFile>} files - The array of files to encode and format. * @param {Array<MongoFile>} files - The array of files to encode and format.
* @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image. * @param {EModelEndpoint} [endpoint] - Optional: The endpoint for the image.
* @param {string} [mode] - Optional: The endpoint mode for the image. * @param {string} [mode] - Optional: The endpoint mode for the image.
* @returns {Promise<{ text: string; files: MongoFile[]; image_urls: MessageContentImageUrl[] }>} - A promise that resolves to the result object containing the encoded images and file details. * @returns {Promise<{ files: MongoFile[]; image_urls: MessageContentImageUrl[] }>} - A promise that resolves to the result object containing the encoded images and file details.
*/ */
async function encodeAndFormat(req, files, endpoint, mode) { async function encodeAndFormat(req, files, endpoint, mode) {
const promises = []; const promises = [];
/** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareImagePayload' | 'getDownloadStream'>>} */ /** @type {Record<FileSources, Pick<ReturnType<typeof getStrategyFunctions>, 'prepareImagePayload' | 'getDownloadStream'>>} */
const encodingMethods = {}; const encodingMethods = {};
/** @type {{ text: string; files: MongoFile[]; image_urls: MessageContentImageUrl[] }} */ /** @type {{ files: MongoFile[]; image_urls: MessageContentImageUrl[] }} */
const result = { const result = {
text: '',
files: [], files: [],
image_urls: [], image_urls: [],
}; };
@ -105,29 +102,9 @@ async function encodeAndFormat(req, files, endpoint, mode) {
return result; return result;
} }
const fileTokenLimit =
req.body?.fileTokenLimit ?? mergeFileConfig(req.config?.fileConfig).fileTokenLimit;
for (let file of files) { for (let file of files) {
/** @type {FileSources} */ /** @type {FileSources} */
const source = file.source ?? FileSources.local; const source = file.source ?? FileSources.local;
if (source === FileSources.text && file.text) {
let fileText = file.text;
const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
text: fileText,
tokenLimit: fileTokenLimit,
tokenCountFn: (text) => countTokens(text),
});
if (wasTruncated) {
logger.debug(
`[encodeAndFormat] Text content truncated for file: ${file.filename} due to token limits`,
);
}
result.text += `${!result.text ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
}
if (!file.height) { if (!file.height) {
promises.push([file, null]); promises.push([file, null]);
@ -165,10 +142,6 @@ async function encodeAndFormat(req, files, endpoint, mode) {
promises.push(preparePayload(req, file)); promises.push(preparePayload(req, file));
} }
if (result.text) {
result.text += '\n```';
}
const detail = req.body.imageDetail ?? ImageDetail.auto; const detail = req.body.imageDetail ?? ImageDetail.auto;
/** @type {Array<[MongoFile, string]>} */ /** @type {Array<[MongoFile, string]>} */

View file

@ -0,0 +1,68 @@
import { logger } from '@librechat/data-schemas';
import { FileSources, mergeFileConfig } from 'librechat-data-provider';
import type { fileConfigSchema } from 'librechat-data-provider';
import type { IMongoFile } from '@librechat/data-schemas';
import type { z } from 'zod';
import { processTextWithTokenLimit } from '~/utils/text';
/**
* Extracts text context from attachments and returns formatted text.
* This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
* @param params - The parameters object
* @param params.attachments - Array of file attachments
* @param params.req - Express request object for config access
* @param params.tokenCountFn - Function to count tokens in text
* @returns The formatted file context text, or undefined if no text found
*/
export async function extractFileContext({
attachments,
req,
tokenCountFn,
}: {
attachments: IMongoFile[];
req?: {
body?: { fileTokenLimit?: number };
config?: { fileConfig?: z.infer<typeof fileConfigSchema> };
};
tokenCountFn: (text: string) => number;
}): Promise<string | undefined> {
if (!attachments || attachments.length === 0) {
return undefined;
}
const fileConfig = mergeFileConfig(req?.config?.fileConfig);
const fileTokenLimit = req?.body?.fileTokenLimit ?? fileConfig.fileTokenLimit;
if (!fileTokenLimit) {
// If no token limit, return undefined (no processing)
return undefined;
}
let resultText = '';
for (const file of attachments) {
const source = file.source ?? FileSources.local;
if (source === FileSources.text && file.text) {
const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
text: file.text,
tokenLimit: fileTokenLimit,
tokenCountFn,
});
if (wasTruncated) {
logger.debug(
`[extractFileContext] Text content truncated for file: ${file.filename} due to token limits`,
);
}
resultText += `${!resultText ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
}
}
if (resultText) {
resultText += '\n```';
return resultText;
}
return undefined;
}

View file

@ -1,4 +1,5 @@
export * from './audio'; export * from './audio';
export * from './context';
export * from './encode'; export * from './encode';
export * from './mistral/crud'; export * from './mistral/crud';
export * from './ocr'; export * from './ocr';