📜 fix: Better OpenAI Assistants Annotation Processing (#3565)

* fix: correctly handle openai assistants annotations

* fix: Handle adjacent identical citations only for our specific format

* fix: correctly handle adjacent identical citations

* refactor: make regex handling more robust

* fix: skip annotation condition, make regex handling more robust

* refactor: Handle FILE_PATH and FILE_CITATION annotation types in processMessages.spec.js

* ci: unit tests for real file path type annotations
This commit is contained in:
Danny Avila 2024-08-06 18:52:58 -04:00 committed by GitHub
parent c2a79aee1b
commit 270c6d2350
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 1097 additions and 123 deletions

View file

@ -11,7 +11,6 @@ const { recordMessage, getMessages } = require('~/models/Message');
const { saveConvo } = require('~/models/Conversation');
const spendTokens = require('~/models/spendTokens');
const { countTokens } = require('~/server/utils');
const { logger } = require('~/config');
/**
* Initializes a new thread or adds messages to an existing thread.
@ -516,80 +515,34 @@ const recordUsage = async ({
);
};
/**
* Creates a replaceAnnotation function with internal state for tracking the index offset.
*
* @returns {function} The replaceAnnotation function with closure for index offset.
const uniqueCitationStart = '^====||===';
const uniqueCitationEnd = '==|||||^';
/** Helper function to escape special characters in regex
* @param {string} string - The string to escape.
* @returns {string} The escaped string.
*/
function createReplaceAnnotation() {
let indexOffset = 0;
/**
* Safely replaces the annotated text within the specified range denoted by start_index and end_index,
* after verifying that the text within that range matches the given annotation text.
* Proceeds with the replacement even if a mismatch is found, but logs a warning.
*
* @param {object} params The original text content.
* @param {string} params.currentText The current text content, with/without replacements.
* @param {number} params.start_index The starting index where replacement should begin.
* @param {number} params.end_index The ending index where replacement should end.
* @param {string} params.expectedText The text expected to be found in the specified range.
* @param {string} params.replacementText The text to insert in place of the existing content.
* @returns {string} The text with the replacement applied, regardless of text match.
*/
function replaceAnnotation({
currentText,
start_index,
end_index,
expectedText,
replacementText,
}) {
const adjustedStartIndex = start_index + indexOffset;
const adjustedEndIndex = end_index + indexOffset;
if (
adjustedStartIndex < 0 ||
adjustedEndIndex > currentText.length ||
adjustedStartIndex > adjustedEndIndex
) {
logger.warn(`Invalid range specified for annotation replacement.
Attempting replacement with \`replace\` method instead...
length: ${currentText.length}
start_index: ${adjustedStartIndex}
end_index: ${adjustedEndIndex}`);
return currentText.replace(expectedText, replacementText);
}
if (currentText.substring(adjustedStartIndex, adjustedEndIndex) !== expectedText) {
return currentText.replace(expectedText, replacementText);
}
indexOffset += replacementText.length - (adjustedEndIndex - adjustedStartIndex);
return (
currentText.slice(0, adjustedStartIndex) +
replacementText +
currentText.slice(adjustedEndIndex)
);
}
return replaceAnnotation;
function escapeRegExp(string) {
return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/**
* Sorts, processes, and flattens messages to a single string.
*
* @param {object} params - The OpenAI client instance.
* @param {object} params - The parameters for processing messages.
* @param {OpenAIClient} params.openai - The OpenAI client instance.
* @param {RunClient} params.client - The LibreChat client that manages the run: either refers to `OpenAI` or `StreamRunManager`.
* @param {ThreadMessage[]} params.messages - An array of messages.
* @returns {Promise<{messages: ThreadMessage[], text: string}>} The sorted messages and the flattened text.
* @returns {Promise<{messages: ThreadMessage[], text: string, edited: boolean}>} The sorted messages, the flattened text, and whether it was edited.
*/
async function processMessages({ openai, client, messages = [] }) {
const sorted = messages.sort((a, b) => a.created_at - b.created_at);
let text = '';
let edited = false;
const sources = [];
const sources = new Map();
const fileRetrievalPromises = [];
for (const message of sorted) {
message.files = [];
for (const content of message.content) {
@ -598,15 +551,21 @@ async function processMessages({ openai, client, messages = [] }) {
const currentFileId = contentType?.file_id;
if (type === ContentTypes.IMAGE_FILE && !client.processedFileIds.has(currentFileId)) {
const file = await retrieveAndProcessFile({
openai,
client,
file_id: currentFileId,
basename: `${currentFileId}.png`,
});
client.processedFileIds.add(currentFileId);
message.files.push(file);
fileRetrievalPromises.push(
retrieveAndProcessFile({
openai,
client,
file_id: currentFileId,
basename: `${currentFileId}.png`,
})
.then((file) => {
client.processedFileIds.add(currentFileId);
message.files.push(file);
})
.catch((error) => {
console.error(`Failed to retrieve file: ${error.message}`);
}),
);
continue;
}
@ -615,78 +574,110 @@ async function processMessages({ openai, client, messages = [] }) {
/** @type {{ annotations: Annotation[] }} */
const { annotations } = contentType ?? {};
// Process annotations if they exist
if (!annotations?.length) {
text += currentText + ' ';
text += currentText;
continue;
}
const originalText = currentText;
text += originalText;
const replaceAnnotation = createReplaceAnnotation();
logger.debug('[processMessages] Processing annotations:', annotations);
for (const annotation of annotations) {
let file;
const replacements = [];
const annotationPromises = annotations.map(async (annotation) => {
const type = annotation.type;
const annotationType = annotation[type];
const file_id = annotationType?.file_id;
const alreadyProcessed = client.processedFileIds.has(file_id);
const replaceCurrentAnnotation = (replacementText = '') => {
const { start_index, end_index, text: expectedText } = annotation;
currentText = replaceAnnotation({
originalText,
currentText,
start_index,
end_index,
expectedText,
replacementText,
});
edited = true;
};
let file;
let replacementText = '';
if (alreadyProcessed) {
const { file_id } = annotationType || {};
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
} else if (type === AnnotationTypes.FILE_PATH) {
const basename = path.basename(annotation.text);
file = await retrieveAndProcessFile({
openai,
client,
file_id,
basename,
});
replaceCurrentAnnotation(file.filepath);
} else if (type === AnnotationTypes.FILE_CITATION) {
file = await retrieveAndProcessFile({
openai,
client,
file_id,
unknownType: true,
});
sources.push(file.filename);
replaceCurrentAnnotation(`^${sources.length}^`);
try {
if (alreadyProcessed) {
file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
} else if (type === AnnotationTypes.FILE_PATH) {
const basename = path.basename(annotation.text);
file = await retrieveAndProcessFile({
openai,
client,
file_id,
basename,
});
replacementText = file.filepath;
} else if (type === AnnotationTypes.FILE_CITATION && file_id) {
file = await retrieveAndProcessFile({
openai,
client,
file_id,
unknownType: true,
});
if (file && file.filename) {
if (!sources.has(file.filename)) {
sources.set(file.filename, sources.size + 1);
}
replacementText = `${uniqueCitationStart}${sources.get(
file.filename,
)}${uniqueCitationEnd}`;
}
}
if (file && replacementText) {
replacements.push({
start: annotation.start_index,
end: annotation.end_index,
text: replacementText,
});
edited = true;
if (!alreadyProcessed) {
client.processedFileIds.add(file_id);
message.files.push(file);
}
}
} catch (error) {
console.error(`Failed to process annotation: ${error.message}`);
}
});
text = currentText;
await Promise.all(annotationPromises);
if (!file) {
continue;
}
client.processedFileIds.add(file_id);
message.files.push(file);
// Apply replacements in reverse order
replacements.sort((a, b) => b.start - a.start);
for (const { start, end, text: replacementText } of replacements) {
currentText = currentText.slice(0, start) + replacementText + currentText.slice(end);
}
text += currentText;
}
}
if (sources.length) {
await Promise.all(fileRetrievalPromises);
// Handle adjacent identical citations with the unique format
const adjacentCitationRegex = new RegExp(
`${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(
uniqueCitationEnd,
)}(\\s*)${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(uniqueCitationEnd)}`,
'g',
);
text = text.replace(adjacentCitationRegex, (match, num1, space, num2) => {
return num1 === num2
? `${uniqueCitationStart}${num1}${uniqueCitationEnd}`
: `${uniqueCitationStart}${num1}${uniqueCitationEnd}${space}${uniqueCitationStart}${num2}${uniqueCitationEnd}`;
});
// Remove any remaining adjacent identical citations
const remainingAdjacentRegex = new RegExp(
`(${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(uniqueCitationEnd)})\\s*\\1+`,
'g',
);
text = text.replace(remainingAdjacentRegex, '$1');
// Replace the unique citation format with the final format
text = text.replace(new RegExp(escapeRegExp(uniqueCitationStart), 'g'), '^');
text = text.replace(new RegExp(escapeRegExp(uniqueCitationEnd), 'g'), '^');
if (sources.size) {
text += '\n\n';
for (let i = 0; i < sources.length; i++) {
text += `^${i + 1}.^ ${sources[i]}${i === sources.length - 1 ? '' : '\n'}`;
}
Array.from(sources.entries()).forEach(([source, index], arrayIndex) => {
text += `^${index}.^ ${source}${arrayIndex === sources.size - 1 ? '' : '\n'}`;
});
}
return { messages: sorted, text, edited };

View file

@ -0,0 +1,983 @@
const { retrieveAndProcessFile } = require('~/server/services/Files/process');
const { processMessages } = require('./manage');
jest.mock('~/server/services/Files/process', () => ({
retrieveAndProcessFile: jest.fn(),
}));
describe('processMessages', () => {
let openai, client;
beforeEach(() => {
openai = {};
client = {
processedFileIds: new Set(),
};
jest.clearAllMocks();
retrieveAndProcessFile.mockReset();
});
test('handles normal case with single source', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This is a test ^1^ and another^1^',
annotations: [
{
type: 'file_citation',
start_index: 15,
end_index: 18,
file_citation: { file_id: 'file1' },
},
{
type: 'file_citation',
start_index: 30,
end_index: 33,
file_citation: { file_id: 'file1' },
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
const result = await processMessages({ openai, client, messages });
expect(result.text).toBe('This is a test ^1^ and another^1^\n\n^1.^ test.txt');
expect(result.edited).toBe(true);
});
test('handles multiple different sources', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This is a test ^1^ and another^2^',
annotations: [
{
type: 'file_citation',
start_index: 15,
end_index: 18,
file_citation: { file_id: 'file1' },
},
{
type: 'file_citation',
start_index: 30,
end_index: 33,
file_citation: { file_id: 'file2' },
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile
.mockResolvedValueOnce({ filename: 'test1.txt' })
.mockResolvedValueOnce({ filename: 'test2.txt' });
const result = await processMessages({ openai, client, messages });
expect(result.text).toBe('This is a test ^1^ and another^2^\n\n^1.^ test1.txt\n^2.^ test2.txt');
expect(result.edited).toBe(true);
});
test('handles file retrieval failure', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This is a test ^1^',
annotations: [
{
type: 'file_citation',
start_index: 15,
end_index: 18,
file_citation: { file_id: 'file1' },
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockRejectedValue(new Error('File not found'));
const result = await processMessages({ openai, client, messages });
expect(result.text).toBe('This is a test ^1^');
expect(result.edited).toBe(false);
});
test('handles citations without file ids', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This is a test ^1^',
annotations: [{ type: 'file_citation', start_index: 15, end_index: 18 }],
},
},
],
created_at: 1,
},
];
const result = await processMessages({ openai, client, messages });
expect(result.text).toBe('This is a test ^1^');
expect(result.edited).toBe(false);
});
test('handles mixed valid and invalid citations', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This is a test ^1^ and ^2^ and ^3^',
annotations: [
{
type: 'file_citation',
start_index: 15,
end_index: 18,
file_citation: { file_id: 'file1' },
},
{ type: 'file_citation', start_index: 23, end_index: 26 },
{
type: 'file_citation',
start_index: 31,
end_index: 34,
file_citation: { file_id: 'file3' },
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile
.mockResolvedValueOnce({ filename: 'test1.txt' })
.mockResolvedValueOnce({ filename: 'test3.txt' });
const result = await processMessages({ openai, client, messages });
expect(result.text).toBe(
'This is a test ^1^ and ^2^ and ^2^\n\n^1.^ test1.txt\n^2.^ test3.txt',
);
expect(result.edited).toBe(true);
});
test('handles adjacent identical citations', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This is a test ^1^^1^ and ^1^ ^1^',
annotations: [
{
type: 'file_citation',
start_index: 15,
end_index: 18,
file_citation: { file_id: 'file1' },
},
{
type: 'file_citation',
start_index: 18,
end_index: 21,
file_citation: { file_id: 'file1' },
},
{
type: 'file_citation',
start_index: 26,
end_index: 29,
file_citation: { file_id: 'file1' },
},
{
type: 'file_citation',
start_index: 30,
end_index: 33,
file_citation: { file_id: 'file1' },
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
const result = await processMessages({ openai, client, messages });
expect(result.text).toBe('This is a test ^1^ and ^1^\n\n^1.^ test.txt');
expect(result.edited).toBe(true);
});
test('handles real data with multiple adjacent citations', async () => {
const messages = [
{
id: 'msg_XXXXXXXXXXXXXXXXXXXX',
object: 'thread.message',
created_at: 1722980324,
assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
status: 'completed',
incomplete_details: null,
incomplete_at: null,
completed_at: 1722980331,
role: 'assistant',
content: [
{
type: 'text',
text: {
value:
'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
annotations: [
{
type: 'file_citation',
text: '【11:2†source】',
start_index: 420,
end_index: 433,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:4†source】',
start_index: 433,
end_index: 446,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:9†source】',
start_index: 578,
end_index: 591,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:14†source】',
start_index: 591,
end_index: 605,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:12†source】',
start_index: 767,
end_index: 781,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:18†source】',
start_index: 781,
end_index: 795,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:16†source】',
start_index: 935,
end_index: 949,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:1†source】',
start_index: 1114,
end_index: 1127,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:10†source】',
start_index: 1127,
end_index: 1141,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:7†source】',
start_index: 1141,
end_index: 1154,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
],
},
},
],
attachments: [],
metadata: {},
files: [
{
object: 'file',
id: 'file-XXXXXXXXXXXXXXXXXXXX',
purpose: 'assistants',
filename: 'hp1.txt',
bytes: 439742,
created_at: 1722962139,
status: 'processed',
status_details: null,
type: 'text/plain',
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/hp1.txt',
usage: 1,
user: 'XXXXXXXXXXXXXXXXXXXX',
context: 'assistants',
source: 'openai',
model: 'gpt-4o',
},
],
},
];
retrieveAndProcessFile.mockResolvedValue({ filename: 'hp1.txt' });
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText = `The text you have uploaded is from the book "Harry Potter and the Philosopher's Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:
1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry^1^.
2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's^1^.
3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background^1^.
4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy^1^.
5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort^1^.
These points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.
^1.^ hp1.txt`;
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
test('handles real data with multiple adjacent citations with multiple sources', async () => {
const messages = [
{
id: 'msg_XXXXXXXXXXXXXXXXXXXX',
object: 'thread.message',
created_at: 1722980324,
assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
status: 'completed',
incomplete_details: null,
incomplete_at: null,
completed_at: 1722980331,
role: 'assistant',
content: [
{
type: 'text',
text: {
value:
'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
annotations: [
{
type: 'file_citation',
text: '【11:2†source】',
start_index: 420,
end_index: 433,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:4†source】',
start_index: 433,
end_index: 446,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:9†source】',
start_index: 578,
end_index: 591,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:14†source】',
start_index: 591,
end_index: 605,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:12†source】',
start_index: 767,
end_index: 781,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:18†source】',
start_index: 781,
end_index: 795,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:16†source】',
start_index: 935,
end_index: 949,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:1†source】',
start_index: 1114,
end_index: 1127,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:10†source】',
start_index: 1127,
end_index: 1141,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_citation',
text: '【11:7†source】',
start_index: 1141,
end_index: 1154,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
],
},
},
],
attachments: [],
metadata: {},
files: [
{
object: 'file',
id: 'file-XXXXXXXXXXXXXXXXXXXX',
purpose: 'assistants',
filename: 'hp1.txt',
bytes: 439742,
created_at: 1722962139,
status: 'processed',
status_details: null,
type: 'text/plain',
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/hp1.txt',
usage: 1,
user: 'XXXXXXXXXXXXXXXXXXXX',
context: 'assistants',
source: 'openai',
model: 'gpt-4o',
},
],
},
];
retrieveAndProcessFile.mockResolvedValue({ filename: 'hp1.txt' });
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText = `The text you have uploaded is from the book "Harry Potter and the Philosopher's Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:
1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry^1^.
2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's^1^.
3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background^1^.
4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy^1^.
5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort^1^.
These points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.
^1.^ hp1.txt`;
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
test('handles edge case with pre-existing citation-like text', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value:
'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation【11:2†source】.',
annotations: [
{
type: 'file_citation',
text: '【11:2†source】',
start_index: 79,
end_index: 92,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText =
'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation^1^.\n\n^1.^ test.txt';
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
test('handles FILE_PATH annotation type', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'Here is a file path: [file_path]',
annotations: [
{
type: 'file_path',
text: '[file_path]',
start_index: 21,
end_index: 32,
file_path: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockResolvedValue({
filename: 'test.txt',
filepath: '/path/to/test.txt',
});
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText = 'Here is a file path: /path/to/test.txt';
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
test('handles FILE_CITATION annotation type', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'Here is a citation: [citation]',
annotations: [
{
type: 'file_citation',
text: '[citation]',
start_index: 20,
end_index: 30,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText = 'Here is a citation: ^1^\n\n^1.^ test.txt';
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
test('handles multiple annotation types in a single message', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value:
'File path: [file_path]. Citation: [citation1]. Another citation: [citation2].',
annotations: [
{
type: 'file_path',
text: '[file_path]',
start_index: 11,
end_index: 22,
file_path: {
file_id: 'file-XXXXXXXXXXXXXXXX1',
},
},
{
type: 'file_citation',
text: '[citation1]',
start_index: 34,
end_index: 45,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXX2',
},
},
{
type: 'file_citation',
text: '[citation2]',
start_index: 65,
end_index: 76,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXX3',
},
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockResolvedValueOnce({
filename: 'file1.txt',
filepath: '/path/to/file1.txt',
});
retrieveAndProcessFile.mockResolvedValueOnce({ filename: 'file2.txt' });
retrieveAndProcessFile.mockResolvedValueOnce({ filename: 'file3.txt' });
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText =
'File path: /path/to/file1.txt. Citation: ^1^. Another citation: ^2^.\n\n^1.^ file2.txt\n^2.^ file3.txt';
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
test('handles annotation processing failure', async () => {
const messages = [
{
content: [
{
type: 'text',
text: {
value: 'This citation will fail: [citation]',
annotations: [
{
type: 'file_citation',
text: '[citation]',
start_index: 25,
end_index: 35,
file_citation: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
],
},
},
],
created_at: 1,
},
];
retrieveAndProcessFile.mockRejectedValue(new Error('File not found'));
const result = await processMessages({
openai: {},
client: { processedFileIds: new Set() },
messages,
});
const expectedText = 'This citation will fail: [citation]';
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(false);
});
test('handles multiple FILE_PATH annotations with sandbox links', async () => {
const messages = [
{
id: 'msg_XXXXXXXXXXXXXXXXXXXX',
object: 'thread.message',
created_at: 1722983745,
assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
status: 'completed',
incomplete_details: null,
incomplete_at: null,
completed_at: 1722983747,
role: 'assistant',
content: [
{
type: 'text',
text: {
value:
'I have generated three dummy CSV files for you. You can download them using the links below:\n\n1. [Download Dummy Data 1](sandbox:/mnt/data/dummy_data1.csv)\n2. [Download Dummy Data 2](sandbox:/mnt/data/dummy_data2.csv)\n3. [Download Dummy Data 3](sandbox:/mnt/data/dummy_data3.csv)',
annotations: [
{
type: 'file_path',
text: 'sandbox:/mnt/data/dummy_data1.csv',
start_index: 121,
end_index: 154,
file_path: {
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
},
},
{
type: 'file_path',
text: 'sandbox:/mnt/data/dummy_data2.csv',
start_index: 183,
end_index: 216,
file_path: {
file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
},
},
{
type: 'file_path',
text: 'sandbox:/mnt/data/dummy_data3.csv',
start_index: 245,
end_index: 278,
file_path: {
file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
},
},
],
},
},
],
attachments: [
{
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
tools: [
{
type: 'code_interpreter',
},
],
},
{
file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
tools: [
{
type: 'code_interpreter',
},
],
},
{
file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
tools: [
{
type: 'code_interpreter',
},
],
},
],
metadata: {},
files: [
{
object: 'file',
id: 'file-XXXXXXXXXXXXXXXXXXXX',
purpose: 'assistants_output',
filename: 'dummy_data1.csv',
bytes: 1925,
created_at: 1722983746,
status: 'processed',
status_details: null,
type: 'text/csv',
file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv',
usage: 1,
user: 'XXXXXXXXXXXXXXXXXXXX',
context: 'assistants_output',
source: 'openai',
model: 'gpt-4o-mini',
},
{
object: 'file',
id: 'file-YYYYYYYYYYYYYYYYYYYY',
purpose: 'assistants_output',
filename: 'dummy_data2.csv',
bytes: 4221,
created_at: 1722983746,
status: 'processed',
status_details: null,
type: 'text/csv',
file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv',
usage: 1,
user: 'XXXXXXXXXXXXXXXXXXXX',
context: 'assistants_output',
source: 'openai',
model: 'gpt-4o-mini',
},
{
object: 'file',
id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
purpose: 'assistants_output',
filename: 'dummy_data3.csv',
bytes: 3534,
created_at: 1722983747,
status: 'processed',
status_details: null,
type: 'text/csv',
file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv',
usage: 1,
user: 'XXXXXXXXXXXXXXXXXXXX',
context: 'assistants_output',
source: 'openai',
model: 'gpt-4o-mini',
},
],
},
];
const mockClient = {
processedFileIds: new Set(),
};
// Mock the retrieveAndProcessFile function for each file
retrieveAndProcessFile.mockImplementation(({ file_id }) => {
const fileMap = {
'file-XXXXXXXXXXXXXXXXXXXX': {
filename: 'dummy_data1.csv',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv',
},
'file-YYYYYYYYYYYYYYYYYYYY': {
filename: 'dummy_data2.csv',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv',
},
'file-ZZZZZZZZZZZZZZZZZZZZ': {
filename: 'dummy_data3.csv',
filepath:
'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv',
},
};
return Promise.resolve(fileMap[file_id]);
});
const result = await processMessages({ openai: {}, client: mockClient, messages });
const expectedText =
'I have generated three dummy CSV files for you. You can download them using the links below:\n\n1. [Download Dummy Data 1](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv)\n2. [Download Dummy Data 2](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv)\n3. [Download Dummy Data 3](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv)';
expect(result.text).toBe(expectedText);
expect(result.edited).toBe(true);
});
});