🔥 feat: deepseek-reasoner Thought Streaming (#5379)

* 🔧 refactor: Remove unused penalties and enhance reasoning token handling in OpenAIClient * 🔧 refactor: `addInstructions` default to adding instructions at index 0, flag for legacy behavior * chore: remove long placeholder * chore: update localization strings across multiple languages * ci: adjust tests for new `addInstructions` behavior
2026-02-01 23:31:51 +01:00 · 2025-01-20 18:21:18 -05:00 · 2025-01-20 18:21:18 -05:00 · d6b4d83b68
commit d6b4d83b68
parent 79585e22d2
14 changed files with 67 additions and 18 deletions
--- a/api/app/clients/BaseClient.js
+++ b/api/app/clients/BaseClient.js
@ -264,17 +264,24 @@ class BaseClient {
  /**
   * Adds instructions to the messages array. If the instructions object is empty or undefined,
   * the original messages array is returned. Otherwise, the instructions are added to the messages
-   * array, preserving the last message at the end.
+   * array either at the beginning (default) or preserving the last message at the end.
   *
   * @param {Array} messages - An array of messages.
   * @param {Object} instructions - An object containing instructions to be added to the messages.
+   * @param {boolean} [beforeLast=false] - If true, adds instructions before the last message; if false, adds at the beginning.
   * @returns {Array} An array containing messages and instructions, or the original messages if instructions are empty.
   */
-  addInstructions(messages, instructions) {
-    const payload = [];
+  addInstructions(messages, instructions, beforeLast = false) {
    if (!instructions || Object.keys(instructions).length === 0) {
      return messages;
    }
+
+    if (!beforeLast) {
+      return [instructions, ...messages];
+    }
+
+    // Legacy behavior: add instructions before the last message
+    const payload = [];
    if (messages.length > 1) {
      payload.push(...messages.slice(0, -1));
    }
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -614,8 +614,6 @@ class OpenAIClient extends BaseClient {
    model = 'gpt-4o-mini',
    modelName,
    temperature = 0.2,
-    presence_penalty = 0,
-    frequency_penalty = 0,
    max_tokens,
    streaming,
    context,
@ -626,8 +624,6 @@ class OpenAIClient extends BaseClient {
    const modelOptions = {
      modelName: modelName ?? model,
      temperature,
-      presence_penalty,
-      frequency_penalty,
      user: this.user,
    };

@ -1065,6 +1061,7 @@ ${convo}
    let error = null;
    const errorCallback = (err) => (error = err);
    const intermediateReply = [];
+    const reasoningTokens = [];
    try {
      if (!abortController) {
        abortController = new AbortController();
@ -1292,8 +1289,23 @@ ${convo}
            }
          });

+        let reasoningCompleted = false;
        for await (const chunk of stream) {
+          if (chunk?.choices?.[0]?.delta?.reasoning_content) {
+            const reasoning_content = chunk?.choices?.[0]?.delta?.reasoning_content || '';
+            intermediateReply.push(reasoning_content);
+            reasoningTokens.push(reasoning_content);
+            onProgress(reasoning_content);
+          }
+
          const token = chunk?.choices?.[0]?.delta?.content || '';
+          if (!reasoningCompleted && reasoningTokens.length > 0 && token) {
+            reasoningCompleted = true;
+            const separatorTokens = '\n\n---\n';
+            reasoningTokens.push(separatorTokens);
+            onProgress(separatorTokens);
+          }
+
          intermediateReply.push(token);
          onProgress(token);
          if (abortController.signal.aborted) {
@ -1360,6 +1372,10 @@ ${convo}
        return reply;
      }

+      if (reasoningTokens.length > 0) {
+        return reasoningTokens.join('') + message.content;
+      }
+
      return message.content;
    } catch (err) {
      if (
--- a/api/app/clients/specs/BaseClient.test.js
+++ b/api/app/clients/specs/BaseClient.test.js
@ -88,6 +88,19 @@ describe('BaseClient', () => {
    const messages = [{ content: 'Hello' }, { content: 'How are you?' }, { content: 'Goodbye' }];
    const instructions = { content: 'Please respond to the question.' };
    const result = TestClient.addInstructions(messages, instructions);
+    const expected = [
+      { content: 'Please respond to the question.' },
+      { content: 'Hello' },
+      { content: 'How are you?' },
+      { content: 'Goodbye' },
+    ];
+    expect(result).toEqual(expected);
+  });
+
+  test('returns the input messages with instructions properly added when addInstructions() with legacy flag', () => {
+    const messages = [{ content: 'Hello' }, { content: 'How are you?' }, { content: 'Goodbye' }];
+    const instructions = { content: 'Please respond to the question.' };
+    const result = TestClient.addInstructions(messages, instructions, true);
    const expected = [
      { content: 'Hello' },
      { content: 'How are you?' },