🖼️ fix: correct image extraction (#3538)

* fix: regex issue extracting text with images in markdown * fix: update addImages function to include only the first observed image path in the response message * ci: tests for addImages function: correct image extraction * fix(GoogleClient): linting --------- Co-authored-by: Dongwoo Jeong <dongwoo.jeong@lge.com> Co-authored-by: Dongwoo Jeong <dongwoo@duck.com>
2025-12-17 17:00:15 +01:00 · 2024-08-04 20:53:11 -04:00 · 2024-08-04 20:53:11 -04:00 · 5baa95bd9a
commit 5baa95bd9a
parent 389b2a6cab
3 changed files with 86 additions and 26 deletions
--- a/api/app/clients/GoogleClient.js
+++ b/api/app/clients/GoogleClient.js
@ -879,7 +879,8 @@ class GoogleClient extends BaseClient {
    return [
      {
        category: 'HARM_CATEGORY_SEXUALLY_EXPLICIT',
-        threshold: process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
+        threshold:
          process.env.GOOGLE_SAFETY_SEXUALLY_EXPLICIT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
      },
      {
        category: 'HARM_CATEGORY_HATE_SPEECH',
@ -891,7 +892,8 @@ class GoogleClient extends BaseClient {
      },
      {
        category: 'HARM_CATEGORY_DANGEROUS_CONTENT',
-        threshold: process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
+        threshold:
          process.env.GOOGLE_SAFETY_DANGEROUS_CONTENT || 'HARM_BLOCK_THRESHOLD_UNSPECIFIED',
      },
    ];
  }
--- a/api/app/clients/output_parsers/addImages.js
+++ b/api/app/clients/output_parsers/addImages.js
@ -60,10 +60,10 @@ function addImages(intermediateSteps, responseMessage) {
    if (!observation || !observation.includes('![')) {
      return;
    }
-    const observedImagePath = observation.match(/!\[.*\]\([^)]*\)/g);
+    const observedImagePath = observation.match(/!\[[^(]*\]\([^)]*\)/g);
    if (observedImagePath && !responseMessage.text.includes(observedImagePath[0])) {
-      responseMessage.text += '\n' + observation;
+      responseMessage.text += '\n' + observedImagePath[0];
-      logger.debug('[addImages] added image from intermediateSteps:', observation);
+      logger.debug('[addImages] added image from intermediateSteps:', observedImagePath[0]);
    }
  });
 }
--- a/api/app/clients/output_parsers/addImages.spec.js
+++ b/api/app/clients/output_parsers/addImages.spec.js
@ -81,4 +81,62 @@ describe('addImages', () => {
    addImages(intermediateSteps, responseMessage);
    expect(responseMessage.text).toBe(`${originalText}\n${imageMarkdown}`);
  });
  it('should extract only image markdowns when there is text between them', () => {
    const markdownWithTextBetweenImages = `
      ![image1](/images/image1.png)
      Some text between images that should not be included.
      ![image2](/images/image2.png)
      More text that should be ignored.
      ![image3](/images/image3.png)
    `;
    intermediateSteps.push({ observation: markdownWithTextBetweenImages });
    addImages(intermediateSteps, responseMessage);
    expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
  });
  it('should only return the first image when multiple images are present', () => {
    const markdownWithMultipleImages = `
      ![image1](/images/image1.png)
      ![image2](/images/image2.png)
      ![image3](/images/image3.png)
    `;
    intermediateSteps.push({ observation: markdownWithMultipleImages });
    addImages(intermediateSteps, responseMessage);
    expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
  });
  it('should not include any text or metadata surrounding the image markdown', () => {
    const markdownWithMetadata = `
      Title: Test Document
      Author: John Doe
      ![image1](/images/image1.png)
      Some content after the image.
      Vector values: [0.1, 0.2, 0.3]
    `;
    intermediateSteps.push({ observation: markdownWithMetadata });
    addImages(intermediateSteps, responseMessage);
    expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
  });
  it('should handle complex markdown with multiple images and only return the first one', () => {
    const complexMarkdown = `
      # Document Title
      ## Section 1
      Here's some text with an embedded image:
      ![image1](/images/image1.png)
      ## Section 2
      More text here...
      ![image2](/images/image2.png)
      ### Subsection
      Even more content
      ![image3](/images/image3.png)
    `;
    intermediateSteps.push({ observation: complexMarkdown });
    addImages(intermediateSteps, responseMessage);
    expect(responseMessage.text).toBe('\n![image1](/images/image1.png)');
  });
 });