mirror of
https://github.com/danny-avila/LibreChat.git
synced 2025-12-16 08:20:14 +01:00
🔧 fix: Improve Assistants File Citation & Download Handling (#2248)
* fix(processMessages): properly handle assistant file citations and add sources list * feat: improve file download UX by making any downloaded files accessible within the app post-download * refactor(processOpenAIImageOutput): correctly handle two different outputs for images since OpenAI generates a file in their storage, shares filepath for image rendering * refactor: create `addFileToCache` helper to use across frontend * refactor: add ImageFile parts to cache on processing content stream
This commit is contained in:
parent
bc2a628902
commit
6a6b2e79b0
11 changed files with 142 additions and 57 deletions
|
|
@ -104,6 +104,7 @@ router.get('/download/:userId/:filepath', async (req, res) => {
|
|||
const setHeaders = () => {
|
||||
res.setHeader('Content-Disposition', `attachment; filename="${file.filename}"`);
|
||||
res.setHeader('Content-Type', 'application/octet-stream');
|
||||
res.setHeader('X-File-Metadata', JSON.stringify(file));
|
||||
};
|
||||
|
||||
/** @type {{ body: import('stream').PassThrough } | undefined} */
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
const path = require('path');
|
||||
const { klona } = require('klona');
|
||||
const {
|
||||
StepTypes,
|
||||
|
|
@ -233,14 +232,9 @@ function createInProgressHandler(openai, thread_id, messages) {
|
|||
file_id,
|
||||
basename: `${file_id}.png`,
|
||||
});
|
||||
// toolCall.asset_pointer = file.filepath;
|
||||
const prelimImage = {
|
||||
file_id,
|
||||
filename: path.basename(file.filepath),
|
||||
filepath: file.filepath,
|
||||
height: file.height,
|
||||
width: file.width,
|
||||
};
|
||||
|
||||
const prelimImage = file;
|
||||
|
||||
// check if every key has a value before adding to content
|
||||
const prelimImageKeys = Object.keys(prelimImage);
|
||||
const validImageFile = prelimImageKeys.every((key) => prelimImage[key]);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ const {
|
|||
imageExtRegex,
|
||||
EModelEndpoint,
|
||||
mergeFileConfig,
|
||||
hostImageIdSuffix,
|
||||
hostImageNamePrefix,
|
||||
} = require('librechat-data-provider');
|
||||
const { convertToWebP, resizeAndConvert } = require('~/server/services/Files/images');
|
||||
const { initializeClient } = require('~/server/services/Endpoints/assistants');
|
||||
|
|
@ -309,7 +311,7 @@ const processFileUpload = async ({ req, res, file, metadata }) => {
|
|||
* @param {OpenAI} params.openai - The OpenAI client instance.
|
||||
* @param {string} params.file_id - The ID of the file to retrieve.
|
||||
* @param {string} params.userId - The user ID.
|
||||
* @param {string} params.filename - The name of the file.
|
||||
* @param {string} [params.filename] - The name of the file. `undefined` for `file_citation` annotations.
|
||||
* @param {boolean} [params.saveFile=false] - Whether to save the file metadata to the database.
|
||||
* @param {boolean} [params.updateUsage=false] - Whether to update file usage in database.
|
||||
*/
|
||||
|
|
@ -322,18 +324,23 @@ const processOpenAIFile = async ({
|
|||
updateUsage = false,
|
||||
}) => {
|
||||
const _file = await openai.files.retrieve(file_id);
|
||||
const filepath = `${openai.baseURL}/files/${userId}/${file_id}/${filename}`;
|
||||
const originalName = filename ?? (_file.filename ? path.basename(_file.filename) : undefined);
|
||||
const filepath = `${openai.baseURL}/files/${userId}/${file_id}${
|
||||
originalName ? `/${originalName}` : ''
|
||||
}`;
|
||||
const type = mime.getType(originalName ?? file_id);
|
||||
|
||||
const file = {
|
||||
..._file,
|
||||
type,
|
||||
file_id,
|
||||
filepath,
|
||||
usage: 1,
|
||||
filename,
|
||||
user: userId,
|
||||
context: _file.purpose,
|
||||
source: FileSources.openai,
|
||||
model: openai.req.body.model,
|
||||
type: mime.getType(filename),
|
||||
context: FileContext.assistants_output,
|
||||
filename: originalName ?? file_id,
|
||||
};
|
||||
|
||||
if (saveFile) {
|
||||
|
|
@ -360,18 +367,32 @@ const processOpenAIFile = async ({
|
|||
* @returns {Promise<MongoFile>} The file metadata.
|
||||
*/
|
||||
const processOpenAIImageOutput = async ({ req, buffer, file_id, filename, fileExt }) => {
|
||||
const currentDate = new Date();
|
||||
const formattedDate = currentDate.toISOString();
|
||||
const _file = await convertToWebP(req, buffer, 'high', `${file_id}${fileExt}`);
|
||||
const file = {
|
||||
..._file,
|
||||
file_id,
|
||||
usage: 1,
|
||||
filename,
|
||||
user: req.user.id,
|
||||
type: 'image/webp',
|
||||
createdAt: formattedDate,
|
||||
updatedAt: formattedDate,
|
||||
source: req.app.locals.fileStrategy,
|
||||
context: FileContext.assistants_output,
|
||||
file_id: `${file_id}${hostImageIdSuffix}`,
|
||||
filename: `${hostImageNamePrefix}${filename}`,
|
||||
};
|
||||
createFile(file, true);
|
||||
createFile(
|
||||
{
|
||||
...file,
|
||||
file_id,
|
||||
filename,
|
||||
source: FileSources.openai,
|
||||
type: mime.getType(fileExt),
|
||||
},
|
||||
true,
|
||||
);
|
||||
return file;
|
||||
};
|
||||
|
||||
|
|
@ -382,7 +403,7 @@ const processOpenAIImageOutput = async ({ req, buffer, file_id, filename, fileEx
|
|||
* @param {OpenAIClient} params.openai - The OpenAI client instance.
|
||||
* @param {RunClient} params.client - The LibreChat client instance: either refers to `openai` or `streamRunManager`.
|
||||
* @param {string} params.file_id - The ID of the file to retrieve.
|
||||
* @param {string} params.basename - The basename of the file (if image); e.g., 'image.jpg'.
|
||||
* @param {string} [params.basename] - The basename of the file (if image); e.g., 'image.jpg'. `undefined` for `file_citation` annotations.
|
||||
* @param {boolean} [params.unknownType] - Whether the file type is unknown.
|
||||
* @returns {Promise<{file_id: string, filepath: string, source: string, bytes?: number, width?: number, height?: number} | null>}
|
||||
* - Returns null if `file_id` is not defined; else, the file metadata if successfully retrieved and processed.
|
||||
|
|
@ -398,14 +419,19 @@ async function retrieveAndProcessFile({
|
|||
return null;
|
||||
}
|
||||
|
||||
let basename = _basename;
|
||||
const processArgs = { openai, file_id, filename: basename, userId: client.req.user.id };
|
||||
|
||||
// If no basename provided, return only the file metadata
|
||||
if (!basename) {
|
||||
return await processOpenAIFile({ ...processArgs, saveFile: true });
|
||||
}
|
||||
|
||||
const fileExt = path.extname(basename);
|
||||
if (client.attachedFileIds?.has(file_id) || client.processedFileIds?.has(file_id)) {
|
||||
return processOpenAIFile({ ...processArgs, updateUsage: true });
|
||||
}
|
||||
|
||||
let basename = _basename;
|
||||
const fileExt = path.extname(basename);
|
||||
const processArgs = { openai, file_id, filename: basename, userId: client.req.user.id };
|
||||
|
||||
/**
|
||||
* @returns {Promise<Buffer>} The file data buffer.
|
||||
*/
|
||||
|
|
@ -415,11 +441,6 @@ async function retrieveAndProcessFile({
|
|||
return Buffer.from(arrayBuffer);
|
||||
};
|
||||
|
||||
// If no basename provided, return only the file metadata
|
||||
if (!basename) {
|
||||
return await processOpenAIFile({ ...processArgs, saveFile: true });
|
||||
}
|
||||
|
||||
let dataBuffer;
|
||||
if (unknownType || !fileExt || imageExtRegex.test(basename)) {
|
||||
try {
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
const path = require('path');
|
||||
const {
|
||||
StepTypes,
|
||||
ContentTypes,
|
||||
|
|
@ -222,14 +221,9 @@ class StreamRunManager {
|
|||
file_id,
|
||||
basename: `${file_id}.png`,
|
||||
});
|
||||
// toolCall.asset_pointer = file.filepath;
|
||||
const prelimImage = {
|
||||
file_id,
|
||||
filename: path.basename(file.filepath),
|
||||
filepath: file.filepath,
|
||||
height: file.height,
|
||||
width: file.width,
|
||||
};
|
||||
|
||||
const prelimImage = file;
|
||||
|
||||
// check if every key has a value before adding to content
|
||||
const prelimImageKeys = Object.keys(prelimImage);
|
||||
const validImageFile = prelimImageKeys.every((key) => prelimImage[key]);
|
||||
|
|
|
|||
|
|
@ -549,6 +549,7 @@ async function processMessages({ openai, client, messages = [] }) {
|
|||
|
||||
let text = '';
|
||||
let edited = false;
|
||||
const sources = [];
|
||||
for (const message of sorted) {
|
||||
message.files = [];
|
||||
for (const content of message.content) {
|
||||
|
|
@ -588,6 +589,17 @@ async function processMessages({ openai, client, messages = [] }) {
|
|||
const file_id = annotationType?.file_id;
|
||||
const alreadyProcessed = client.processedFileIds.has(file_id);
|
||||
|
||||
const replaceCurrentAnnotation = (replacement = '') => {
|
||||
currentText = replaceAnnotation(
|
||||
currentText,
|
||||
annotation.start_index,
|
||||
annotation.end_index,
|
||||
annotation.text,
|
||||
replacement,
|
||||
);
|
||||
edited = true;
|
||||
};
|
||||
|
||||
if (alreadyProcessed) {
|
||||
const { file_id } = annotationType || {};
|
||||
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
|
||||
|
|
@ -599,6 +611,7 @@ async function processMessages({ openai, client, messages = [] }) {
|
|||
file_id,
|
||||
basename,
|
||||
});
|
||||
replaceCurrentAnnotation(file.filepath);
|
||||
} else if (type === AnnotationTypes.FILE_CITATION) {
|
||||
file = await retrieveAndProcessFile({
|
||||
openai,
|
||||
|
|
@ -606,17 +619,8 @@ async function processMessages({ openai, client, messages = [] }) {
|
|||
file_id,
|
||||
unknownType: true,
|
||||
});
|
||||
}
|
||||
|
||||
if (file.filepath) {
|
||||
currentText = replaceAnnotation(
|
||||
currentText,
|
||||
annotation.start_index,
|
||||
annotation.end_index,
|
||||
annotation.text,
|
||||
file.filepath,
|
||||
);
|
||||
edited = true;
|
||||
sources.push(file.filename);
|
||||
replaceCurrentAnnotation(`^${sources.length}^`);
|
||||
}
|
||||
|
||||
text += currentText + ' ';
|
||||
|
|
@ -631,6 +635,13 @@ async function processMessages({ openai, client, messages = [] }) {
|
|||
}
|
||||
}
|
||||
|
||||
if (sources.length) {
|
||||
text += '\n\n';
|
||||
for (let i = 0; i < sources.length; i++) {
|
||||
text += `^${i + 1}.^ ${sources[i]}${i === sources.length - 1 ? '' : '\n'}`;
|
||||
}
|
||||
}
|
||||
|
||||
return { messages: sorted, text, edited };
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ import type {
|
|||
TEndpointsConfig,
|
||||
TCheckUserKeyResponse,
|
||||
} from 'librechat-data-provider';
|
||||
import { findPageForConversation } from '~/utils';
|
||||
import { findPageForConversation, addFileToCache } from '~/utils';
|
||||
|
||||
export const useGetFiles = <TData = TFile[] | boolean>(
|
||||
config?: UseQueryOptions<TFile[], unknown, TData>,
|
||||
|
|
@ -326,15 +326,29 @@ export const useGetAssistantDocsQuery = (
|
|||
};
|
||||
|
||||
export const useFileDownload = (userId: string, filepath: string): QueryObserverResult<string> => {
|
||||
const queryClient = useQueryClient();
|
||||
return useQuery(
|
||||
[QueryKeys.fileDownload, filepath],
|
||||
async () => {
|
||||
if (!userId) {
|
||||
console.warn('No user ID provided for file download');
|
||||
}
|
||||
const blob = await dataService.getFileDownload(userId, filepath);
|
||||
const downloadUrl = window.URL.createObjectURL(blob);
|
||||
return downloadUrl;
|
||||
const response = await dataService.getFileDownload(userId, filepath);
|
||||
const blob = response.data;
|
||||
const downloadURL = window.URL.createObjectURL(blob);
|
||||
try {
|
||||
const metadata: TFile | undefined = JSON.parse(response.headers['x-file-metadata']);
|
||||
if (!metadata) {
|
||||
console.warn('No metadata found for file download', response.headers);
|
||||
return downloadURL;
|
||||
}
|
||||
|
||||
addFileToCache(queryClient, metadata);
|
||||
} catch (e) {
|
||||
console.error('Error parsing file metadata, skipped updating file query cache', e);
|
||||
}
|
||||
|
||||
return downloadURL;
|
||||
},
|
||||
{
|
||||
enabled: false,
|
||||
|
|
|
|||
|
|
@ -1,13 +1,18 @@
|
|||
import { useCallback, useMemo } from 'react';
|
||||
import { ContentTypes } from 'librechat-data-provider';
|
||||
import { useQueryClient } from '@tanstack/react-query';
|
||||
|
||||
import type {
|
||||
Text,
|
||||
TMessage,
|
||||
ImageFile,
|
||||
TSubmission,
|
||||
ContentPart,
|
||||
PartMetadata,
|
||||
TContentData,
|
||||
TMessageContentParts,
|
||||
} from 'librechat-data-provider';
|
||||
import { useCallback, useMemo } from 'react';
|
||||
import { addFileToCache } from '~/utils';
|
||||
|
||||
type TUseContentHandler = {
|
||||
setMessages: (messages: TMessage[]) => void;
|
||||
|
|
@ -20,6 +25,7 @@ type TContentHandler = {
|
|||
};
|
||||
|
||||
export default function useContentHandler({ setMessages, getMessages }: TUseContentHandler) {
|
||||
const queryClient = useQueryClient();
|
||||
const messageMap = useMemo(() => new Map<string, TMessage>(), []);
|
||||
return useCallback(
|
||||
({ data, submission }: TContentHandler) => {
|
||||
|
|
@ -47,10 +53,14 @@ export default function useContentHandler({ setMessages, getMessages }: TUseCont
|
|||
}
|
||||
|
||||
// TODO: handle streaming for non-text
|
||||
const textPart: Text | string = data[ContentTypes.TEXT];
|
||||
const textPart: Text | string | undefined = data[ContentTypes.TEXT];
|
||||
const part: ContentPart =
|
||||
textPart && typeof textPart === 'string' ? { value: textPart } : data[type];
|
||||
|
||||
if (type === ContentTypes.IMAGE_FILE) {
|
||||
addFileToCache(queryClient, part as ImageFile & PartMetadata);
|
||||
}
|
||||
|
||||
/* spreading the content array to avoid mutation */
|
||||
response.content = [...(response.content ?? [])];
|
||||
|
||||
|
|
@ -68,6 +78,6 @@ export default function useContentHandler({ setMessages, getMessages }: TUseCont
|
|||
|
||||
setMessages([...messages, response]);
|
||||
},
|
||||
[getMessages, messageMap, setMessages],
|
||||
[queryClient, getMessages, messageMap, setMessages],
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,6 @@
|
|||
import { excelMimeTypes } from 'librechat-data-provider';
|
||||
import { excelMimeTypes, QueryKeys } from 'librechat-data-provider';
|
||||
import type { QueryClient } from '@tanstack/react-query';
|
||||
import type { TFile } from 'librechat-data-provider';
|
||||
import SheetPaths from '~/components/svg/Files/SheetPaths';
|
||||
import TextPaths from '~/components/svg/Files/TextPaths';
|
||||
import FilePaths from '~/components/svg/Files/FilePaths';
|
||||
|
|
@ -128,3 +130,32 @@ export function formatDate(dateString) {
|
|||
|
||||
return `${day} ${month} ${year}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a file to the query cache
|
||||
*/
|
||||
export function addFileToCache(queryClient: QueryClient, newfile: TFile) {
|
||||
const currentFiles = queryClient.getQueryData<TFile[]>([QueryKeys.files]);
|
||||
|
||||
if (!currentFiles) {
|
||||
console.warn('No current files found in cache, skipped updating file query cache');
|
||||
return;
|
||||
}
|
||||
|
||||
const fileIndex = currentFiles.findIndex((file) => file.file_id === newfile.file_id);
|
||||
|
||||
if (fileIndex > -1) {
|
||||
console.warn('File already exists in cache, skipped updating file query cache');
|
||||
return;
|
||||
}
|
||||
|
||||
queryClient.setQueryData<TFile[]>(
|
||||
[QueryKeys.files],
|
||||
[
|
||||
{
|
||||
...newfile,
|
||||
},
|
||||
...currentFiles,
|
||||
],
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import * as t from './types';
|
|||
import * as s from './schemas';
|
||||
import request from './request';
|
||||
import * as endpoints from './api-endpoints';
|
||||
import type { AxiosResponse } from 'axios';
|
||||
|
||||
export function abortRequestWithMessage(
|
||||
endpoint: string,
|
||||
|
|
@ -201,9 +202,9 @@ export const uploadAssistantAvatar = (data: m.AssistantAvatarVariables): Promise
|
|||
);
|
||||
};
|
||||
|
||||
export const getFileDownload = async (userId: string, filepath: string): Promise<Blob> => {
|
||||
export const getFileDownload = async (userId: string, filepath: string): Promise<AxiosResponse> => {
|
||||
const encodedFilePath = encodeURIComponent(filepath);
|
||||
return request.get(`${endpoints.files()}/download/${userId}/${encodedFilePath}`, {
|
||||
return request.getResponse(`${endpoints.files()}/download/${userId}/${encodedFilePath}`, {
|
||||
responseType: 'blob',
|
||||
});
|
||||
};
|
||||
|
|
|
|||
|
|
@ -8,6 +8,10 @@ async function _get<T>(url: string, options?: AxiosRequestConfig): Promise<T> {
|
|||
return response.data;
|
||||
}
|
||||
|
||||
async function _getResponse<T>(url: string, options?: AxiosRequestConfig): Promise<T> {
|
||||
return await axios.get(url, { ...options });
|
||||
}
|
||||
|
||||
async function _post(url: string, data?: any) {
|
||||
const response = await axios.post(url, JSON.stringify(data), {
|
||||
headers: { 'Content-Type': 'application/json' },
|
||||
|
|
@ -114,6 +118,7 @@ axios.interceptors.response.use(
|
|||
|
||||
export default {
|
||||
get: _get,
|
||||
getResponse: _getResponse,
|
||||
post: _post,
|
||||
postMultiPart: _postMultiPart,
|
||||
put: _put,
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import type { OpenAPIV3 } from 'openapi-types';
|
||||
import type { TFile } from './files';
|
||||
|
||||
export type Schema = OpenAPIV3.SchemaObject & { description?: string };
|
||||
export type Reference = OpenAPIV3.ReferenceObject & { description?: string };
|
||||
|
|
@ -131,7 +132,7 @@ export type ToolCallsStepDetails = {
|
|||
type: 'tool_calls'; // Always 'tool_calls'.
|
||||
};
|
||||
|
||||
export type ImageFile = {
|
||||
export type ImageFile = TFile & {
|
||||
/**
|
||||
* The [File](https://platform.openai.com/docs/api-reference/files) ID of the image
|
||||
* in the message content.
|
||||
|
|
@ -267,6 +268,8 @@ export type TContentData = StreamContentData & {
|
|||
|
||||
export const actionDelimiter = '_action_';
|
||||
export const actionDomainSeparator = '---';
|
||||
export const hostImageIdSuffix = '_host_copy';
|
||||
export const hostImageNamePrefix = 'host_copy_';
|
||||
|
||||
export enum AuthTypeEnum {
|
||||
ServiceHttp = 'service_http',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue