📜 fix: Better OpenAI Assistants Annotation Processing (#3565)

* fix: correctly handle openai assistants annotations * fix: Handle adjacent identical citations only for our specific format * fix: correctly handle adjacent identical citations * refactor: make regex handling more robust * fix: skip annotation condition, make regex handling more robust * refactor: Handle FILE_PATH and FILE_CITATION annotation types in processMessages.spec.js * ci: unit tests for real file path type annotations
2026-03-20 22:56:33 +01:00 · 2024-08-06 18:52:58 -04:00 · 2024-08-06 18:52:58 -04:00 · 270c6d2350
commit 270c6d2350
parent c2a79aee1b
2 changed files with 1097 additions and 123 deletions
--- a/api/server/services/Threads/manage.js
+++ b/api/server/services/Threads/manage.js
@ -11,7 +11,6 @@ const { recordMessage, getMessages } = require('~/models/Message');
 const { saveConvo } = require('~/models/Conversation');
 const spendTokens = require('~/models/spendTokens');
 const { countTokens } = require('~/server/utils');
-const { logger } = require('~/config');

 /**
 * Initializes a new thread or adds messages to an existing thread.
@ -516,80 +515,34 @@ const recordUsage = async ({
  );
 };

-/**
- * Creates a replaceAnnotation function with internal state for tracking the index offset.
- *
- * @returns {function} The replaceAnnotation function with closure for index offset.
+const uniqueCitationStart = '^====||===';
+const uniqueCitationEnd = '==|||||^';
+
+/** Helper function to escape special characters in regex
+ * @param {string} string - The string to escape.
+ * @returns {string} The escaped string.
 */
-function createReplaceAnnotation() {
-  let indexOffset = 0;
-
-  /**
-   * Safely replaces the annotated text within the specified range denoted by start_index and end_index,
-   * after verifying that the text within that range matches the given annotation text.
-   * Proceeds with the replacement even if a mismatch is found, but logs a warning.
-   *
-   * @param {object} params The original text content.
-   * @param {string} params.currentText The current text content, with/without replacements.
-   * @param {number} params.start_index The starting index where replacement should begin.
-   * @param {number} params.end_index The ending index where replacement should end.
-   * @param {string} params.expectedText The text expected to be found in the specified range.
-   * @param {string} params.replacementText The text to insert in place of the existing content.
-   * @returns {string} The text with the replacement applied, regardless of text match.
-   */
-  function replaceAnnotation({
-    currentText,
-    start_index,
-    end_index,
-    expectedText,
-    replacementText,
-  }) {
-    const adjustedStartIndex = start_index + indexOffset;
-    const adjustedEndIndex = end_index + indexOffset;
-
-    if (
-      adjustedStartIndex < 0 ||
-      adjustedEndIndex > currentText.length ||
-      adjustedStartIndex > adjustedEndIndex
-    ) {
-      logger.warn(`Invalid range specified for annotation replacement.
-      Attempting replacement with \`replace\` method instead...
-      length: ${currentText.length}
-      start_index: ${adjustedStartIndex}
-      end_index: ${adjustedEndIndex}`);
-      return currentText.replace(expectedText, replacementText);
-    }
-
-    if (currentText.substring(adjustedStartIndex, adjustedEndIndex) !== expectedText) {
-      return currentText.replace(expectedText, replacementText);
-    }
-
-    indexOffset += replacementText.length - (adjustedEndIndex - adjustedStartIndex);
-    return (
-      currentText.slice(0, adjustedStartIndex) +
-      replacementText +
-      currentText.slice(adjustedEndIndex)
-    );
-  }
-
-  return replaceAnnotation;
+function escapeRegExp(string) {
+  return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
 }

 /**
 * Sorts, processes, and flattens messages to a single string.
 *
- * @param {object} params - The OpenAI client instance.
+ * @param {object} params - The parameters for processing messages.
 * @param {OpenAIClient} params.openai - The OpenAI client instance.
 * @param {RunClient} params.client - The LibreChat client that manages the run: either refers to `OpenAI` or `StreamRunManager`.
 * @param {ThreadMessage[]} params.messages - An array of messages.
- * @returns {Promise<{messages: ThreadMessage[], text: string}>} The sorted messages and the flattened text.
+ * @returns {Promise<{messages: ThreadMessage[], text: string, edited: boolean}>} The sorted messages, the flattened text, and whether it was edited.
 */
 async function processMessages({ openai, client, messages = [] }) {
  const sorted = messages.sort((a, b) => a.created_at - b.created_at);

  let text = '';
  let edited = false;
-  const sources = [];
+  const sources = new Map();
+  const fileRetrievalPromises = [];
+
  for (const message of sorted) {
    message.files = [];
    for (const content of message.content) {
@ -598,15 +551,21 @@ async function processMessages({ openai, client, messages = [] }) {
      const currentFileId = contentType?.file_id;

      if (type === ContentTypes.IMAGE_FILE && !client.processedFileIds.has(currentFileId)) {
-        const file = await retrieveAndProcessFile({
-          openai,
-          client,
-          file_id: currentFileId,
-          basename: `${currentFileId}.png`,
-        });
-
-        client.processedFileIds.add(currentFileId);
-        message.files.push(file);
+        fileRetrievalPromises.push(
+          retrieveAndProcessFile({
+            openai,
+            client,
+            file_id: currentFileId,
+            basename: `${currentFileId}.png`,
+          })
+            .then((file) => {
+              client.processedFileIds.add(currentFileId);
+              message.files.push(file);
+            })
+            .catch((error) => {
+              console.error(`Failed to retrieve file: ${error.message}`);
+            }),
+        );
        continue;
      }

@ -615,78 +574,110 @@ async function processMessages({ openai, client, messages = [] }) {
      /** @type {{ annotations: Annotation[] }} */
      const { annotations } = contentType ?? {};

-      // Process annotations if they exist
      if (!annotations?.length) {
-        text += currentText + ' ';
+        text += currentText;
        continue;
      }

-      const originalText = currentText;
-      text += originalText;
-
-      const replaceAnnotation = createReplaceAnnotation();
-
-      logger.debug('[processMessages] Processing annotations:', annotations);
-      for (const annotation of annotations) {
-        let file;
+      const replacements = [];
+      const annotationPromises = annotations.map(async (annotation) => {
        const type = annotation.type;
        const annotationType = annotation[type];
        const file_id = annotationType?.file_id;
        const alreadyProcessed = client.processedFileIds.has(file_id);

-        const replaceCurrentAnnotation = (replacementText = '') => {
-          const { start_index, end_index, text: expectedText } = annotation;
-          currentText = replaceAnnotation({
-            originalText,
-            currentText,
-            start_index,
-            end_index,
-            expectedText,
-            replacementText,
-          });
-          edited = true;
-        };
+        let file;
+        let replacementText = '';

-        if (alreadyProcessed) {
-          const { file_id } = annotationType || {};
-          file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
-        } else if (type === AnnotationTypes.FILE_PATH) {
-          const basename = path.basename(annotation.text);
-          file = await retrieveAndProcessFile({
-            openai,
-            client,
-            file_id,
-            basename,
-          });
-          replaceCurrentAnnotation(file.filepath);
-        } else if (type === AnnotationTypes.FILE_CITATION) {
-          file = await retrieveAndProcessFile({
-            openai,
-            client,
-            file_id,
-            unknownType: true,
-          });
-          sources.push(file.filename);
-          replaceCurrentAnnotation(`^${sources.length}^`);
+        try {
+          if (alreadyProcessed) {
+            file = await retrieveAndProcessFile({ openai, client, file_id, unknownType: true });
+          } else if (type === AnnotationTypes.FILE_PATH) {
+            const basename = path.basename(annotation.text);
+            file = await retrieveAndProcessFile({
+              openai,
+              client,
+              file_id,
+              basename,
+            });
+            replacementText = file.filepath;
+          } else if (type === AnnotationTypes.FILE_CITATION && file_id) {
+            file = await retrieveAndProcessFile({
+              openai,
+              client,
+              file_id,
+              unknownType: true,
+            });
+            if (file && file.filename) {
+              if (!sources.has(file.filename)) {
+                sources.set(file.filename, sources.size + 1);
+              }
+              replacementText = `${uniqueCitationStart}${sources.get(
+                file.filename,
+              )}${uniqueCitationEnd}`;
+            }
+          }
+
+          if (file && replacementText) {
+            replacements.push({
+              start: annotation.start_index,
+              end: annotation.end_index,
+              text: replacementText,
+            });
+            edited = true;
+            if (!alreadyProcessed) {
+              client.processedFileIds.add(file_id);
+              message.files.push(file);
+            }
+          }
+        } catch (error) {
+          console.error(`Failed to process annotation: ${error.message}`);
        }
+      });

-        text = currentText;
+      await Promise.all(annotationPromises);

-        if (!file) {
-          continue;
-        }
-
-        client.processedFileIds.add(file_id);
-        message.files.push(file);
+      // Apply replacements in reverse order
+      replacements.sort((a, b) => b.start - a.start);
+      for (const { start, end, text: replacementText } of replacements) {
+        currentText = currentText.slice(0, start) + replacementText + currentText.slice(end);
      }
+
+      text += currentText;
    }
  }

-  if (sources.length) {
+  await Promise.all(fileRetrievalPromises);
+
+  // Handle adjacent identical citations with the unique format
+  const adjacentCitationRegex = new RegExp(
+    `${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(
+      uniqueCitationEnd,
+    )}(\\s*)${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(uniqueCitationEnd)}`,
+    'g',
+  );
+  text = text.replace(adjacentCitationRegex, (match, num1, space, num2) => {
+    return num1 === num2
+      ? `${uniqueCitationStart}${num1}${uniqueCitationEnd}`
+      : `${uniqueCitationStart}${num1}${uniqueCitationEnd}${space}${uniqueCitationStart}${num2}${uniqueCitationEnd}`;
+  });
+
+  // Remove any remaining adjacent identical citations
+  const remainingAdjacentRegex = new RegExp(
+    `(${escapeRegExp(uniqueCitationStart)}(\\d+)${escapeRegExp(uniqueCitationEnd)})\\s*\\1+`,
+    'g',
+  );
+  text = text.replace(remainingAdjacentRegex, '$1');
+
+  // Replace the unique citation format with the final format
+  text = text.replace(new RegExp(escapeRegExp(uniqueCitationStart), 'g'), '^');
+  text = text.replace(new RegExp(escapeRegExp(uniqueCitationEnd), 'g'), '^');
+
+  if (sources.size) {
    text += '\n\n';
-    for (let i = 0; i < sources.length; i++) {
-      text += `^${i + 1}.^ ${sources[i]}${i === sources.length - 1 ? '' : '\n'}`;
-    }
+    Array.from(sources.entries()).forEach(([source, index], arrayIndex) => {
+      text += `^${index}.^ ${source}${arrayIndex === sources.size - 1 ? '' : '\n'}`;
+    });
  }

  return { messages: sorted, text, edited };
--- a/api/server/services/Threads/processMessages.spec.js
+++ b/api/server/services/Threads/processMessages.spec.js
@ -0,0 +1,983 @@
+const { retrieveAndProcessFile } = require('~/server/services/Files/process');
+const { processMessages } = require('./manage');
+
+jest.mock('~/server/services/Files/process', () => ({
+  retrieveAndProcessFile: jest.fn(),
+}));
+
+describe('processMessages', () => {
+  let openai, client;
+
+  beforeEach(() => {
+    openai = {};
+    client = {
+      processedFileIds: new Set(),
+    };
+    jest.clearAllMocks();
+    retrieveAndProcessFile.mockReset();
+  });
+
+  test('handles normal case with single source', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This is a test ^1^ and another^1^',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  start_index: 15,
+                  end_index: 18,
+                  file_citation: { file_id: 'file1' },
+                },
+                {
+                  type: 'file_citation',
+                  start_index: 30,
+                  end_index: 33,
+                  file_citation: { file_id: 'file1' },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
+
+    const result = await processMessages({ openai, client, messages });
+
+    expect(result.text).toBe('This is a test ^1^ and another^1^\n\n^1.^ test.txt');
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles multiple different sources', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This is a test ^1^ and another^2^',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  start_index: 15,
+                  end_index: 18,
+                  file_citation: { file_id: 'file1' },
+                },
+                {
+                  type: 'file_citation',
+                  start_index: 30,
+                  end_index: 33,
+                  file_citation: { file_id: 'file2' },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile
+      .mockResolvedValueOnce({ filename: 'test1.txt' })
+      .mockResolvedValueOnce({ filename: 'test2.txt' });
+
+    const result = await processMessages({ openai, client, messages });
+
+    expect(result.text).toBe('This is a test ^1^ and another^2^\n\n^1.^ test1.txt\n^2.^ test2.txt');
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles file retrieval failure', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This is a test ^1^',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  start_index: 15,
+                  end_index: 18,
+                  file_citation: { file_id: 'file1' },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockRejectedValue(new Error('File not found'));
+
+    const result = await processMessages({ openai, client, messages });
+
+    expect(result.text).toBe('This is a test ^1^');
+    expect(result.edited).toBe(false);
+  });
+
+  test('handles citations without file ids', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This is a test ^1^',
+              annotations: [{ type: 'file_citation', start_index: 15, end_index: 18 }],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    const result = await processMessages({ openai, client, messages });
+
+    expect(result.text).toBe('This is a test ^1^');
+    expect(result.edited).toBe(false);
+  });
+
+  test('handles mixed valid and invalid citations', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This is a test ^1^ and ^2^ and ^3^',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  start_index: 15,
+                  end_index: 18,
+                  file_citation: { file_id: 'file1' },
+                },
+                { type: 'file_citation', start_index: 23, end_index: 26 },
+                {
+                  type: 'file_citation',
+                  start_index: 31,
+                  end_index: 34,
+                  file_citation: { file_id: 'file3' },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile
+      .mockResolvedValueOnce({ filename: 'test1.txt' })
+      .mockResolvedValueOnce({ filename: 'test3.txt' });
+
+    const result = await processMessages({ openai, client, messages });
+
+    expect(result.text).toBe(
+      'This is a test ^1^ and ^2^ and ^2^\n\n^1.^ test1.txt\n^2.^ test3.txt',
+    );
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles adjacent identical citations', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This is a test ^1^^1^ and ^1^ ^1^',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  start_index: 15,
+                  end_index: 18,
+                  file_citation: { file_id: 'file1' },
+                },
+                {
+                  type: 'file_citation',
+                  start_index: 18,
+                  end_index: 21,
+                  file_citation: { file_id: 'file1' },
+                },
+                {
+                  type: 'file_citation',
+                  start_index: 26,
+                  end_index: 29,
+                  file_citation: { file_id: 'file1' },
+                },
+                {
+                  type: 'file_citation',
+                  start_index: 30,
+                  end_index: 33,
+                  file_citation: { file_id: 'file1' },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
+
+    const result = await processMessages({ openai, client, messages });
+
+    expect(result.text).toBe('This is a test ^1^ and ^1^\n\n^1.^ test.txt');
+    expect(result.edited).toBe(true);
+  });
+  test('handles real data with multiple adjacent citations', async () => {
+    const messages = [
+      {
+        id: 'msg_XXXXXXXXXXXXXXXXXXXX',
+        object: 'thread.message',
+        created_at: 1722980324,
+        assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
+        thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
+        run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
+        status: 'completed',
+        incomplete_details: null,
+        incomplete_at: null,
+        completed_at: 1722980331,
+        role: 'assistant',
+        content: [
+          {
+            type: 'text',
+            text: {
+              value:
+                'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  text: '【11:2†source】',
+                  start_index: 420,
+                  end_index: 433,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:4†source】',
+                  start_index: 433,
+                  end_index: 446,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:9†source】',
+                  start_index: 578,
+                  end_index: 591,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:14†source】',
+                  start_index: 591,
+                  end_index: 605,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:12†source】',
+                  start_index: 767,
+                  end_index: 781,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:18†source】',
+                  start_index: 781,
+                  end_index: 795,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:16†source】',
+                  start_index: 935,
+                  end_index: 949,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:1†source】',
+                  start_index: 1114,
+                  end_index: 1127,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:10†source】',
+                  start_index: 1127,
+                  end_index: 1141,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:7†source】',
+                  start_index: 1141,
+                  end_index: 1154,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        attachments: [],
+        metadata: {},
+        files: [
+          {
+            object: 'file',
+            id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            purpose: 'assistants',
+            filename: 'hp1.txt',
+            bytes: 439742,
+            created_at: 1722962139,
+            status: 'processed',
+            status_details: null,
+            type: 'text/plain',
+            file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            filepath:
+              'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/hp1.txt',
+            usage: 1,
+            user: 'XXXXXXXXXXXXXXXXXXXX',
+            context: 'assistants',
+            source: 'openai',
+            model: 'gpt-4o',
+          },
+        ],
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({ filename: 'hp1.txt' });
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText = `The text you have uploaded is from the book "Harry Potter and the Philosopher's Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:
+
+1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry^1^.
+
+2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's^1^.
+
+3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background^1^.
+
+4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy^1^.
+
+5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort^1^.
+
+These points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.
+
+^1.^ hp1.txt`;
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles real data with multiple adjacent citations with multiple sources', async () => {
+    const messages = [
+      {
+        id: 'msg_XXXXXXXXXXXXXXXXXXXX',
+        object: 'thread.message',
+        created_at: 1722980324,
+        assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
+        thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
+        run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
+        status: 'completed',
+        incomplete_details: null,
+        incomplete_at: null,
+        completed_at: 1722980331,
+        role: 'assistant',
+        content: [
+          {
+            type: 'text',
+            text: {
+              value:
+                'The text you have uploaded is from the book "Harry Potter and the Philosopher\'s Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:\n\n1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry【11:2†source】【11:4†source】.\n\n2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander\'s【11:9†source】【11:14†source】.\n\n3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background【11:12†source】【11:18†source】.\n\n4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy【11:16†source】.\n\n5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher\'s Stone and its connection to the dark wizard Voldemort【11:1†source】【11:10†source】【11:7†source】.\n\nThese points highlight Harry\'s initial experiences in the magical world and set the stage for his adventures at Hogwarts.',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  text: '【11:2†source】',
+                  start_index: 420,
+                  end_index: 433,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:4†source】',
+                  start_index: 433,
+                  end_index: 446,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:9†source】',
+                  start_index: 578,
+                  end_index: 591,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:14†source】',
+                  start_index: 591,
+                  end_index: 605,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:12†source】',
+                  start_index: 767,
+                  end_index: 781,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:18†source】',
+                  start_index: 781,
+                  end_index: 795,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:16†source】',
+                  start_index: 935,
+                  end_index: 949,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:1†source】',
+                  start_index: 1114,
+                  end_index: 1127,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:10†source】',
+                  start_index: 1127,
+                  end_index: 1141,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '【11:7†source】',
+                  start_index: 1141,
+                  end_index: 1154,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        attachments: [],
+        metadata: {},
+        files: [
+          {
+            object: 'file',
+            id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            purpose: 'assistants',
+            filename: 'hp1.txt',
+            bytes: 439742,
+            created_at: 1722962139,
+            status: 'processed',
+            status_details: null,
+            type: 'text/plain',
+            file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            filepath:
+              'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/hp1.txt',
+            usage: 1,
+            user: 'XXXXXXXXXXXXXXXXXXXX',
+            context: 'assistants',
+            source: 'openai',
+            model: 'gpt-4o',
+          },
+        ],
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({ filename: 'hp1.txt' });
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText = `The text you have uploaded is from the book "Harry Potter and the Philosopher's Stone" by J.K. Rowling. It follows the story of a young boy named Harry Potter who discovers that he is a wizard on his eleventh birthday. Here are some key points of the narrative:
+
+1. **Discovery and Invitation to Hogwarts**: Harry learns that he is a wizard and receives an invitation to attend Hogwarts School of Witchcraft and Wizardry^1^.
+
+2. **Shopping for Supplies**: Hagrid takes Harry to Diagon Alley to buy his school supplies, including his wand from Ollivander's^1^.
+
+3. **Introduction to Hogwarts**: Harry is introduced to Hogwarts, the magical school where he will learn about magic and discover more about his own background^1^.
+
+4. **Meeting Friends and Enemies**: At Hogwarts, Harry makes friends like Ron Weasley and Hermione Granger, and enemies like Draco Malfoy^1^.
+
+5. **Uncovering the Mystery**: Harry, along with Ron and Hermione, uncovers the mystery of the Philosopher's Stone and its connection to the dark wizard Voldemort^1^.
+
+These points highlight Harry's initial experiences in the magical world and set the stage for his adventures at Hogwarts.
+
+^1.^ hp1.txt`;
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles edge case with pre-existing citation-like text', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value:
+                'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation【11:2†source】.',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  text: '【11:2†source】',
+                  start_index: 79,
+                  end_index: 92,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText =
+      'This is a test ^1^ with pre-existing citation-like text. Here\'s a real citation^1^.\n\n^1.^ test.txt';
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles FILE_PATH annotation type', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'Here is a file path: [file_path]',
+              annotations: [
+                {
+                  type: 'file_path',
+                  text: '[file_path]',
+                  start_index: 21,
+                  end_index: 32,
+                  file_path: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({
+      filename: 'test.txt',
+      filepath: '/path/to/test.txt',
+    });
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText = 'Here is a file path: /path/to/test.txt';
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles FILE_CITATION annotation type', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'Here is a citation: [citation]',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  text: '[citation]',
+                  start_index: 20,
+                  end_index: 30,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValue({ filename: 'test.txt' });
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText = 'Here is a citation: ^1^\n\n^1.^ test.txt';
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles multiple annotation types in a single message', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value:
+                'File path: [file_path]. Citation: [citation1]. Another citation: [citation2].',
+              annotations: [
+                {
+                  type: 'file_path',
+                  text: '[file_path]',
+                  start_index: 11,
+                  end_index: 22,
+                  file_path: {
+                    file_id: 'file-XXXXXXXXXXXXXXXX1',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '[citation1]',
+                  start_index: 34,
+                  end_index: 45,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXX2',
+                  },
+                },
+                {
+                  type: 'file_citation',
+                  text: '[citation2]',
+                  start_index: 65,
+                  end_index: 76,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXX3',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockResolvedValueOnce({
+      filename: 'file1.txt',
+      filepath: '/path/to/file1.txt',
+    });
+    retrieveAndProcessFile.mockResolvedValueOnce({ filename: 'file2.txt' });
+    retrieveAndProcessFile.mockResolvedValueOnce({ filename: 'file3.txt' });
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText =
+      'File path: /path/to/file1.txt. Citation: ^1^. Another citation: ^2^.\n\n^1.^ file2.txt\n^2.^ file3.txt';
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+
+  test('handles annotation processing failure', async () => {
+    const messages = [
+      {
+        content: [
+          {
+            type: 'text',
+            text: {
+              value: 'This citation will fail: [citation]',
+              annotations: [
+                {
+                  type: 'file_citation',
+                  text: '[citation]',
+                  start_index: 25,
+                  end_index: 35,
+                  file_citation: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        created_at: 1,
+      },
+    ];
+
+    retrieveAndProcessFile.mockRejectedValue(new Error('File not found'));
+
+    const result = await processMessages({
+      openai: {},
+      client: { processedFileIds: new Set() },
+      messages,
+    });
+
+    const expectedText = 'This citation will fail: [citation]';
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(false);
+  });
+
+  test('handles multiple FILE_PATH annotations with sandbox links', async () => {
+    const messages = [
+      {
+        id: 'msg_XXXXXXXXXXXXXXXXXXXX',
+        object: 'thread.message',
+        created_at: 1722983745,
+        assistant_id: 'asst_XXXXXXXXXXXXXXXXXXXX',
+        thread_id: 'thread_XXXXXXXXXXXXXXXXXXXX',
+        run_id: 'run_XXXXXXXXXXXXXXXXXXXX',
+        status: 'completed',
+        incomplete_details: null,
+        incomplete_at: null,
+        completed_at: 1722983747,
+        role: 'assistant',
+        content: [
+          {
+            type: 'text',
+            text: {
+              value:
+                'I have generated three dummy CSV files for you. You can download them using the links below:\n\n1. [Download Dummy Data 1](sandbox:/mnt/data/dummy_data1.csv)\n2. [Download Dummy Data 2](sandbox:/mnt/data/dummy_data2.csv)\n3. [Download Dummy Data 3](sandbox:/mnt/data/dummy_data3.csv)',
+              annotations: [
+                {
+                  type: 'file_path',
+                  text: 'sandbox:/mnt/data/dummy_data1.csv',
+                  start_index: 121,
+                  end_index: 154,
+                  file_path: {
+                    file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+                  },
+                },
+                {
+                  type: 'file_path',
+                  text: 'sandbox:/mnt/data/dummy_data2.csv',
+                  start_index: 183,
+                  end_index: 216,
+                  file_path: {
+                    file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
+                  },
+                },
+                {
+                  type: 'file_path',
+                  text: 'sandbox:/mnt/data/dummy_data3.csv',
+                  start_index: 245,
+                  end_index: 278,
+                  file_path: {
+                    file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
+                  },
+                },
+              ],
+            },
+          },
+        ],
+        attachments: [
+          {
+            file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            tools: [
+              {
+                type: 'code_interpreter',
+              },
+            ],
+          },
+          {
+            file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
+            tools: [
+              {
+                type: 'code_interpreter',
+              },
+            ],
+          },
+          {
+            file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
+            tools: [
+              {
+                type: 'code_interpreter',
+              },
+            ],
+          },
+        ],
+        metadata: {},
+        files: [
+          {
+            object: 'file',
+            id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            purpose: 'assistants_output',
+            filename: 'dummy_data1.csv',
+            bytes: 1925,
+            created_at: 1722983746,
+            status: 'processed',
+            status_details: null,
+            type: 'text/csv',
+            file_id: 'file-XXXXXXXXXXXXXXXXXXXX',
+            filepath:
+              'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv',
+            usage: 1,
+            user: 'XXXXXXXXXXXXXXXXXXXX',
+            context: 'assistants_output',
+            source: 'openai',
+            model: 'gpt-4o-mini',
+          },
+          {
+            object: 'file',
+            id: 'file-YYYYYYYYYYYYYYYYYYYY',
+            purpose: 'assistants_output',
+            filename: 'dummy_data2.csv',
+            bytes: 4221,
+            created_at: 1722983746,
+            status: 'processed',
+            status_details: null,
+            type: 'text/csv',
+            file_id: 'file-YYYYYYYYYYYYYYYYYYYY',
+            filepath:
+              'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv',
+            usage: 1,
+            user: 'XXXXXXXXXXXXXXXXXXXX',
+            context: 'assistants_output',
+            source: 'openai',
+            model: 'gpt-4o-mini',
+          },
+          {
+            object: 'file',
+            id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
+            purpose: 'assistants_output',
+            filename: 'dummy_data3.csv',
+            bytes: 3534,
+            created_at: 1722983747,
+            status: 'processed',
+            status_details: null,
+            type: 'text/csv',
+            file_id: 'file-ZZZZZZZZZZZZZZZZZZZZ',
+            filepath:
+              'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv',
+            usage: 1,
+            user: 'XXXXXXXXXXXXXXXXXXXX',
+            context: 'assistants_output',
+            source: 'openai',
+            model: 'gpt-4o-mini',
+          },
+        ],
+      },
+    ];
+
+    const mockClient = {
+      processedFileIds: new Set(),
+    };
+
+    // Mock the retrieveAndProcessFile function for each file
+    retrieveAndProcessFile.mockImplementation(({ file_id }) => {
+      const fileMap = {
+        'file-XXXXXXXXXXXXXXXXXXXX': {
+          filename: 'dummy_data1.csv',
+          filepath:
+            'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv',
+        },
+        'file-YYYYYYYYYYYYYYYYYYYY': {
+          filename: 'dummy_data2.csv',
+          filepath:
+            'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv',
+        },
+        'file-ZZZZZZZZZZZZZZZZZZZZ': {
+          filename: 'dummy_data3.csv',
+          filepath:
+            'https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv',
+        },
+      };
+
+      return Promise.resolve(fileMap[file_id]);
+    });
+
+    const result = await processMessages({ openai: {}, client: mockClient, messages });
+
+    const expectedText =
+      'I have generated three dummy CSV files for you. You can download them using the links below:\n\n1. [Download Dummy Data 1](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-XXXXXXXXXXXXXXXXXXXX/dummy_data1.csv)\n2. [Download Dummy Data 2](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-YYYYYYYYYYYYYYYYYYYY/dummy_data2.csv)\n3. [Download Dummy Data 3](https://api.openai.com/v1/files/XXXXXXXXXXXXXXXXXXXX/file-ZZZZZZZZZZZZZZZZZZZZ/dummy_data3.csv)';
+
+    expect(result.text).toBe(expectedText);
+    expect(result.edited).toBe(true);
+  });
+});