mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-17 08:50:15 +01:00
🖼️ fix: Extract File Context & Persist Attachments (#10069)
- problem: `addImageUrls` had a side effect that was being leveraged before to populate both the `ocr` message field, now `fileContext`, and `client.options.attachments`, which would record the user's uploaded message attachments to the user message when saved to the database and returned at the end of the request lifecycle - solution: created dedicated handling for file context, and made sure to populate `allFiles` with non-provider attachments
This commit is contained in:
parent
fbe341a171
commit
07d0abc9fd
5 changed files with 128 additions and 50 deletions
68
packages/api/src/files/context.ts
Normal file
68
packages/api/src/files/context.ts
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
import { logger } from '@librechat/data-schemas';
|
||||
import { FileSources, mergeFileConfig } from 'librechat-data-provider';
|
||||
import type { fileConfigSchema } from 'librechat-data-provider';
|
||||
import type { IMongoFile } from '@librechat/data-schemas';
|
||||
import type { z } from 'zod';
|
||||
import { processTextWithTokenLimit } from '~/utils/text';
|
||||
|
||||
/**
|
||||
* Extracts text context from attachments and returns formatted text.
|
||||
* This handles text that was already extracted from files (OCR, transcriptions, document text, etc.)
|
||||
* @param params - The parameters object
|
||||
* @param params.attachments - Array of file attachments
|
||||
* @param params.req - Express request object for config access
|
||||
* @param params.tokenCountFn - Function to count tokens in text
|
||||
* @returns The formatted file context text, or undefined if no text found
|
||||
*/
|
||||
export async function extractFileContext({
|
||||
attachments,
|
||||
req,
|
||||
tokenCountFn,
|
||||
}: {
|
||||
attachments: IMongoFile[];
|
||||
req?: {
|
||||
body?: { fileTokenLimit?: number };
|
||||
config?: { fileConfig?: z.infer<typeof fileConfigSchema> };
|
||||
};
|
||||
tokenCountFn: (text: string) => number;
|
||||
}): Promise<string | undefined> {
|
||||
if (!attachments || attachments.length === 0) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const fileConfig = mergeFileConfig(req?.config?.fileConfig);
|
||||
const fileTokenLimit = req?.body?.fileTokenLimit ?? fileConfig.fileTokenLimit;
|
||||
|
||||
if (!fileTokenLimit) {
|
||||
// If no token limit, return undefined (no processing)
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let resultText = '';
|
||||
|
||||
for (const file of attachments) {
|
||||
const source = file.source ?? FileSources.local;
|
||||
if (source === FileSources.text && file.text) {
|
||||
const { text: limitedText, wasTruncated } = await processTextWithTokenLimit({
|
||||
text: file.text,
|
||||
tokenLimit: fileTokenLimit,
|
||||
tokenCountFn,
|
||||
});
|
||||
|
||||
if (wasTruncated) {
|
||||
logger.debug(
|
||||
`[extractFileContext] Text content truncated for file: ${file.filename} due to token limits`,
|
||||
);
|
||||
}
|
||||
|
||||
resultText += `${!resultText ? 'Attached document(s):\n```md' : '\n\n---\n\n'}# "${file.filename}"\n${limitedText}\n`;
|
||||
}
|
||||
}
|
||||
|
||||
if (resultText) {
|
||||
resultText += '\n```';
|
||||
return resultText;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
|
@ -1,4 +1,5 @@
|
|||
export * from './audio';
|
||||
export * from './context';
|
||||
export * from './encode';
|
||||
export * from './mistral/crud';
|
||||
export * from './ocr';
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue