🗃️ feat: General File Support for OpenAI, Azure, Custom, Anthropic and Google (RAG) (#2143)

* refactor: re-purpose `resendImages` as `resendFiles`

* refactor: re-purpose `resendImages` as `resendFiles`

* feat: upload general files

* feat: embed file during upload

* feat: delete file embeddings on file deletion

* chore(fileConfig): add epub+zip type

* feat(encodeAndFormat): handle non-image files

* feat(createContextHandlers): build context prompt from file attachments and successful RAG

* fix: prevent non-temp files as well as embedded files to be deleted on new conversation

* fix: remove temp_file_id on usage, prevent non-temp files as well as embedded files to be deleted on new conversation

* fix: prevent non-temp files as well as embedded files to be deleted on new conversation

* feat(OpenAI/Anthropic/Google): basic RAG support

* fix: delete `resendFiles` only when true (Default)

* refactor(RAG): update endpoints and pass JWT

* fix(resendFiles): default values

* fix(context/processFile): query unique ids only

* feat: rag-api.yaml

* feat: file upload improved ux for longer uploads

* chore: await embed call and catch embedding errors

* refactor: store augmentedPrompt in Client

* refactor(processFileUpload): throw error if not assistant file upload

* fix(useFileHandling): handle markdown empty mimetype issue

* chore: necessary compose file changes
This commit is contained in:
Danny Avila 2024-03-19 20:54:30 -04:00 committed by GitHub
parent af347cccde
commit f7761df52c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
38 changed files with 683 additions and 261 deletions

View file

@ -1,10 +1,10 @@
const buildOptions = (endpoint, parsedBody) => {
const { modelLabel, promptPrefix, resendImages, ...rest } = parsedBody;
const { modelLabel, promptPrefix, resendFiles, ...rest } = parsedBody;
const endpointOption = {
endpoint,
modelLabel,
promptPrefix,
resendImages,
resendFiles,
modelOptions: {
...rest,
},

View file

@ -1,11 +1,11 @@
const buildOptions = (endpoint, parsedBody, endpointType) => {
const { chatGptLabel, promptPrefix, resendImages, imageDetail, ...rest } = parsedBody;
const { chatGptLabel, promptPrefix, resendFiles, imageDetail, ...rest } = parsedBody;
const endpointOption = {
endpoint,
endpointType,
chatGptLabel,
promptPrefix,
resendImages,
resendFiles,
imageDetail,
modelOptions: {
...rest,

View file

@ -1,10 +1,10 @@
const buildOptions = (endpoint, parsedBody) => {
const { chatGptLabel, promptPrefix, resendImages, imageDetail, ...rest } = parsedBody;
const { chatGptLabel, promptPrefix, resendFiles, imageDetail, ...rest } = parsedBody;
const endpointOption = {
endpoint,
chatGptLabel,
promptPrefix,
resendImages,
resendFiles,
imageDetail,
modelOptions: {
...rest,

View file

@ -1,3 +1,6 @@
const fs = require('fs');
const path = require('path');
const axios = require('axios');
const fetch = require('node-fetch');
const { ref, uploadBytes, getDownloadURL, deleteObject } = require('firebase/storage');
const { getBufferMetadata } = require('~/server/utils');
@ -160,6 +163,18 @@ function extractFirebaseFilePath(urlString) {
* Throws an error if there is an issue with deletion.
*/
const deleteFirebaseFile = async (req, file) => {
if (file.embedded && process.env.RAG_API_URL) {
const jwtToken = req.headers.authorization.split(' ')[1];
axios.delete(`${process.env.RAG_API_URL}/documents`, {
headers: {
Authorization: `Bearer ${jwtToken}`,
'Content-Type': 'application/json',
accept: 'application/json',
},
data: [file.file_id],
});
}
const fileName = extractFirebaseFilePath(file.filepath);
if (!fileName.includes(req.user.id)) {
throw new Error('Invalid file path');
@ -167,10 +182,41 @@ const deleteFirebaseFile = async (req, file) => {
await deleteFile('', fileName);
};
/**
* Uploads a file to Firebase Storage.
*
* @param {Object} params - The params object.
* @param {Express.Request} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user.
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
* have a `path` property that points to the location of the uploaded file.
* @param {string} params.file_id - The file ID.
*
* @returns {Promise<{ filepath: string, bytes: number }>}
* A promise that resolves to an object containing:
* - filepath: The download URL of the uploaded file.
* - bytes: The size of the uploaded file in bytes.
*/
async function uploadFileToFirebase({ req, file, file_id }) {
const inputFilePath = file.path;
const inputBuffer = await fs.promises.readFile(inputFilePath);
const bytes = Buffer.byteLength(inputBuffer);
const userId = req.user.id;
const fileName = `${file_id}__${path.basename(inputFilePath)}`;
const downloadURL = await saveBufferToFirebase({ userId, buffer: inputBuffer, fileName });
await fs.promises.unlink(inputFilePath);
return { filepath: downloadURL, bytes };
}
module.exports = {
deleteFile,
getFirebaseURL,
saveURLToFirebase,
deleteFirebaseFile,
uploadFileToFirebase,
saveBufferToFirebase,
};

View file

@ -188,7 +188,26 @@ const isValidPath = (req, base, subfolder, filepath) => {
* file path is invalid or if there is an error in deletion.
*/
const deleteLocalFile = async (req, file) => {
const { publicPath } = req.app.locals.paths;
const { publicPath, uploads } = req.app.locals.paths;
if (file.embedded && process.env.RAG_API_URL) {
const jwtToken = req.headers.authorization.split(' ')[1];
axios.delete(`${process.env.RAG_API_URL}/documents`, {
headers: {
Authorization: `Bearer ${jwtToken}`,
'Content-Type': 'application/json',
accept: 'application/json',
},
data: [file.file_id],
});
}
if (file.filepath.startsWith(`/uploads/${req.user.id}`)) {
const basePath = file.filepath.split('/uploads/')[1];
const filepath = path.join(uploads, basePath);
await fs.promises.unlink(filepath);
return;
}
const parts = file.filepath.split(path.sep);
const subfolder = parts[1];
const filepath = path.join(publicPath, file.filepath);
@ -200,6 +219,42 @@ const deleteLocalFile = async (req, file) => {
await fs.promises.unlink(filepath);
};
/**
* Uploads a file to the specified upload directory.
*
* @param {Object} params - The params object.
* @param {Object} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user, and an `app.locals.paths` object with an `uploads` path.
* @param {Express.Multer.File} params.file - The file object, which is part of the request. The file object should
* have a `path` property that points to the location of the uploaded file.
* @param {string} params.file_id - The file ID.
*
* @returns {Promise<{ filepath: string, bytes: number }>}
* A promise that resolves to an object containing:
* - filepath: The path where the file is saved.
* - bytes: The size of the file in bytes.
*/
async function uploadLocalFile({ req, file, file_id }) {
const inputFilePath = file.path;
const inputBuffer = await fs.promises.readFile(inputFilePath);
const bytes = Buffer.byteLength(inputBuffer);
const { uploads } = req.app.locals.paths;
const userPath = path.join(uploads, req.user.id);
if (!fs.existsSync(userPath)) {
fs.mkdirSync(userPath, { recursive: true });
}
const fileName = `${file_id}__${path.basename(inputFilePath)}`;
const newPath = path.join(userPath, fileName);
await fs.promises.writeFile(newPath, inputBuffer);
const filepath = path.posix.join('/', 'uploads', req.user.id, path.basename(newPath));
return { filepath, bytes };
}
module.exports = {
saveLocalFile,
saveLocalImage,
@ -207,4 +262,5 @@ module.exports = {
saveFileFromURL,
getLocalFileURL,
deleteLocalFile,
uploadLocalFile,
};

View file

@ -6,13 +6,14 @@ const { logger } = require('~/config');
/**
* Uploads a file that can be used across various OpenAI services.
*
* @param {Express.Request} req - The request object from Express. It should have a `user` property with an `id`
* @param {Object} params - The params object.
* @param {Express.Request} params.req - The request object from Express. It should have a `user` property with an `id`
* representing the user, and an `app.locals.paths` object with an `imageOutput` path.
* @param {Express.Multer.File} file - The file uploaded to the server via multer.
* @param {OpenAIClient} openai - The initialized OpenAI client.
* @param {Express.Multer.File} params.file - The file uploaded to the server via multer.
* @param {OpenAIClient} params.openai - The initialized OpenAI client.
* @returns {Promise<OpenAIFile>}
*/
async function uploadOpenAIFile(req, file, openai) {
async function uploadOpenAIFile({ req, file, openai }) {
const uploadedFile = await openai.files.create({
file: fs.createReadStream(file.path),
purpose: FilePurpose.Assistants,

View file

@ -39,6 +39,11 @@ async function encodeAndFormat(req, files, endpoint) {
for (let file of files) {
const source = file.source ?? FileSources.local;
if (!file.height) {
promises.push([file, null]);
continue;
}
if (!encodingMethods[source]) {
const { prepareImagePayload } = getStrategyFunctions(source);
if (!prepareImagePayload) {
@ -70,6 +75,24 @@ async function encodeAndFormat(req, files, endpoint) {
};
for (const [file, imageContent] of formattedImages) {
const fileMetadata = {
type: file.type,
file_id: file.file_id,
filepath: file.filepath,
filename: file.filename,
embedded: !!file.embedded,
};
if (file.height && file.width) {
fileMetadata.height = file.height;
fileMetadata.width = file.width;
}
if (!imageContent) {
result.files.push(fileMetadata);
continue;
}
const imagePart = {
type: 'image_url',
image_url: {
@ -93,15 +116,7 @@ async function encodeAndFormat(req, files, endpoint) {
}
result.image_urls.push(imagePart);
result.files.push({
file_id: file.file_id,
// filepath: file.filepath,
// filename: file.filename,
// type: file.type,
// height: file.height,
// width: file.width,
});
result.files.push(fileMetadata);
}
return result;
}

View file

@ -1,5 +1,6 @@
const path = require('path');
const { v4 } = require('uuid');
const axios = require('axios');
const mime = require('mime/lite');
const {
isUUID,
@ -189,12 +190,14 @@ const processImageFile = async ({ req, res, file, metadata }) => {
const source = req.app.locals.fileStrategy;
const { handleImageUpload } = getStrategyFunctions(source);
const { file_id, temp_file_id, endpoint } = metadata;
const { filepath, bytes, width, height } = await handleImageUpload({
req,
file,
file_id,
endpoint,
});
const result = await createFile(
{
user: req.user.id,
@ -266,13 +269,46 @@ const processFileUpload = async ({ req, res, file, metadata }) => {
const { handleFileUpload } = getStrategyFunctions(source);
const { file_id, temp_file_id } = metadata;
let embedded = false;
if (process.env.RAG_API_URL) {
try {
const jwtToken = req.headers.authorization.split(' ')[1];
const filepath = `./uploads/temp/${file.path.split('uploads/temp/')[1]}`;
const response = await axios.post(
`${process.env.RAG_API_URL}/embed`,
{
filename: file.originalname,
file_content_type: file.mimetype,
filepath,
file_id,
},
{
headers: {
Authorization: `Bearer ${jwtToken}`,
'Content-Type': 'application/json',
},
},
);
if (response.status === 200) {
embedded = true;
}
} catch (error) {
logger.error('Error embedding file', error);
throw new Error(error);
}
} else if (!isAssistantUpload) {
logger.error('RAG_API_URL not set, cannot support process file upload');
throw new Error('RAG_API_URL not set, cannot support process file upload');
}
/** @type {OpenAI | undefined} */
let openai;
if (source === FileSources.openai) {
({ openai } = await initializeClient({ req }));
}
const { id, bytes, filename, filepath } = await handleFileUpload(req, file, openai);
const { id, bytes, filename, filepath } = await handleFileUpload({ req, file, file_id, openai });
if (isAssistantUpload && !metadata.message_file) {
await openai.beta.assistants.files.create(metadata.assistant_id, {
@ -289,8 +325,9 @@ const processFileUpload = async ({ req, res, file, metadata }) => {
filepath: isAssistantUpload ? `${openai.baseURL}/files/${id}` : filepath,
filename: filename ?? file.originalname,
context: isAssistantUpload ? FileContext.assistants : FileContext.message_attachment,
source,
type: file.mimetype,
embedded,
source,
},
true,
);

View file

@ -5,6 +5,7 @@ const {
saveURLToFirebase,
deleteFirebaseFile,
saveBufferToFirebase,
uploadFileToFirebase,
uploadImageToFirebase,
processFirebaseAvatar,
} = require('./Firebase');
@ -14,6 +15,7 @@ const {
saveFileFromURL,
saveLocalBuffer,
deleteLocalFile,
uploadLocalFile,
uploadLocalImage,
prepareImagesLocal,
processLocalAvatar,
@ -32,6 +34,7 @@ const firebaseStrategy = () => ({
saveBuffer: saveBufferToFirebase,
prepareImagePayload: prepareImageURL,
processAvatar: processFirebaseAvatar,
handleFileUpload: uploadFileToFirebase,
handleImageUpload: uploadImageToFirebase,
});
@ -46,6 +49,7 @@ const localStrategy = () => ({
saveBuffer: saveLocalBuffer,
deleteFile: deleteLocalFile,
processAvatar: processLocalAvatar,
handleFileUpload: uploadLocalFile,
handleImageUpload: uploadLocalImage,
prepareImagePayload: prepareImagesLocal,
});