⬇️ feat: Assistant File Downloads (#2234)

* WIP: basic route for file downloads and file strategy for generating readablestream to pipe as res

* chore(DALLE3): add typing for OpenAI client

* chore: add `CONSOLE_JSON` notes to dotenv.md

* WIP: first pass OpenAI Assistants File Output handling

* feat: first pass assistants output file download from openai

* chore: yml vs. yaml variation to .gitignore for `librechat.yml`

* refactor(retrieveAndProcessFile): remove redundancies

* fix(syncMessages): explicit sort of apiMessages to fix message order on abort

* chore: add logs for warnings and errors, show toast on frontend

* chore: add logger where console was still being used
This commit is contained in:
Danny Avila 2024-03-29 08:23:38 -04:00 committed by GitHub
parent 7945fea0f9
commit a00756c469
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
27 changed files with 555 additions and 248 deletions

View file

@ -2,10 +2,9 @@ const path = require('path');
const { v4 } = require('uuid');
const {
Constants,
FilePurpose,
ContentTypes,
imageExtRegex,
EModelEndpoint,
AnnotationTypes,
defaultOrderQuery,
} = require('librechat-data-provider');
const { retrieveAndProcessFile } = require('~/server/services/Files/process');
@ -434,13 +433,15 @@ async function checkMessageGaps({ openai, latestMessageId, thread_id, run_id, co
}
let addedCurrentMessage = false;
const apiMessages = response.data.map((msg) => {
if (msg.id === currentMessage.id) {
addedCurrentMessage = true;
return currentMessage;
}
return msg;
});
const apiMessages = response.data
.map((msg) => {
if (msg.id === currentMessage.id) {
addedCurrentMessage = true;
return currentMessage;
}
return msg;
})
.sort((a, b) => new Date(a.created_at) - new Date(b.created_at));
if (!addedCurrentMessage) {
apiMessages.push(currentMessage);
@ -496,6 +497,44 @@ const recordUsage = async ({
);
};
/**
* Safely replaces the annotated text within the specified range denoted by start_index and end_index,
* after verifying that the text within that range matches the given annotation text.
* Proceeds with the replacement even if a mismatch is found, but logs a warning.
*
* @param {string} originalText The original text content.
* @param {number} start_index The starting index where replacement should begin.
* @param {number} end_index The ending index where replacement should end.
* @param {string} expectedText The text expected to be found in the specified range.
* @param {string} replacementText The text to insert in place of the existing content.
* @returns {string} The text with the replacement applied, regardless of text match.
*/
function replaceAnnotation(originalText, start_index, end_index, expectedText, replacementText) {
if (start_index < 0 || end_index > originalText.length || start_index > end_index) {
logger.warn(`Invalid range specified for annotation replacement.
Attempting replacement with \`replace\` method instead...
length: ${originalText.length}
start_index: ${start_index}
end_index: ${end_index}`);
return originalText.replace(originalText, replacementText);
}
const actualTextInRange = originalText.substring(start_index, end_index);
if (actualTextInRange !== expectedText) {
logger.warn(`The text within the specified range does not match the expected annotation text.
Attempting replacement with \`replace\` method instead...
Expected: ${expectedText}
Actual: ${actualTextInRange}`);
return originalText.replace(originalText, replacementText);
}
const beforeText = originalText.substring(0, start_index);
const afterText = originalText.substring(end_index);
return beforeText + replacementText + afterText;
}
/**
* Sorts, processes, and flattens messages to a single string.
*
@ -509,89 +548,90 @@ async function processMessages({ openai, client, messages = [] }) {
const sorted = messages.sort((a, b) => a.created_at - b.created_at);
let text = '';
let edited = false;
for (const message of sorted) {
message.files = [];
for (const content of message.content) {
const processImageFile =
content.type === 'image_file' && !client.processedFileIds.has(content.image_file?.file_id);
if (processImageFile) {
const { file_id } = content.image_file;
const type = content.type;
const contentType = content[type];
const currentFileId = contentType?.file_id;
if (type === ContentTypes.IMAGE_FILE && !client.processedFileIds.has(currentFileId)) {
const file = await retrieveAndProcessFile({
openai,
client,
file_id,
basename: `${file_id}.png`,
file_id: currentFileId,
basename: `${currentFileId}.png`,
});
client.processedFileIds.add(file_id);
client.processedFileIds.add(currentFileId);
message.files.push(file);
continue;
}
text += (content.text?.value ?? '') + ' ';
logger.debug('[processMessages] Processing message:', { value: text });
let currentText = contentType?.value ?? '';
/** @type {{ annotations: Annotation[] }} */
const { annotations } = contentType ?? {};
// Process annotations if they exist
if (!content.text?.annotations?.length) {
if (!annotations?.length) {
text += currentText + ' ';
continue;
}
logger.debug('[processMessages] Processing annotations:', content.text.annotations);
for (const annotation of content.text.annotations) {
logger.debug('Current annotation:', annotation);
logger.debug('[processMessages] Processing annotations:', annotations);
for (const annotation of annotations) {
let file;
const processFilePath =
annotation.file_path && !client.processedFileIds.has(annotation.file_path?.file_id);
const type = annotation.type;
const annotationType = annotation[type];
const file_id = annotationType?.file_id;
const alreadyProcessed = client.processedFileIds.has(file_id);
if (processFilePath) {
const basename = imageExtRegex.test(annotation.text)
? path.basename(annotation.text)
: null;
if (alreadyProcessed) {
const { file_id } = annotationType || {};
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
} else if (type === AnnotationTypes.FILE_PATH) {
const basename = path.basename(annotation.text);
file = await retrieveAndProcessFile({
openai,
client,
file_id: annotation.file_path.file_id,
file_id,
basename,
});
client.processedFileIds.add(annotation.file_path.file_id);
}
const processFileCitation =
annotation.file_citation &&
!client.processedFileIds.has(annotation.file_citation?.file_id);
if (processFileCitation) {
} else if (type === AnnotationTypes.FILE_CITATION) {
file = await retrieveAndProcessFile({
openai,
client,
file_id: annotation.file_citation.file_id,
file_id,
unknownType: true,
});
client.processedFileIds.add(annotation.file_citation.file_id);
}
if (!file && (annotation.file_path || annotation.file_citation)) {
const { file_id } = annotation.file_citation || annotation.file_path || {};
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
client.processedFileIds.add(file_id);
if (file.filepath) {
currentText = replaceAnnotation(
currentText,
annotation.start_index,
annotation.end_index,
annotation.text,
file.filepath,
);
edited = true;
}
text += currentText + ' ';
if (!file) {
continue;
}
if (file.purpose && file.purpose === FilePurpose.Assistants) {
text = text.replace(annotation.text, file.filename);
} else if (file.filepath) {
text = text.replace(annotation.text, file.filepath);
}
client.processedFileIds.add(file_id);
message.files.push(file);
}
}
}
return { messages: sorted, text };
return { messages: sorted, text, edited };
}
module.exports = {