⚙️ feat: Adjust Rate of Stream Progress (#3244)

* chore: bump data-provider and add MESSAGES CacheKey * refactor: avoid saving messages while streaming, save partial text to cache instead * fix(ci): processChunks * chore: logging aborted request to debug * feat: set stream rate for token processing * chore: specify default stream rate * fix(ci): Update AppService.js to use optional chaining for endpointLocals assignment * refactor: abstract the error handler * feat: streamRate for assistants; refactor: update default rate for token * refactor: update error handling in assistants/errors.js * refactor: update error handling in assistants/errors.js
2025-12-31 23:58:50 +01:00 · 2024-07-17 10:47:17 -04:00 · 2024-07-17 10:47:17 -04:00 · 5d40d0a37a
commit 5d40d0a37a
parent 1c282d1517
29 changed files with 661 additions and 309 deletions
--- a/api/app/clients/OpenAIClient.js
+++ b/api/app/clients/OpenAIClient.js
@ -1182,8 +1182,10 @@ ${convo}
        });
      }

+      const streamRate = this.options.streamRate ?? Constants.DEFAULT_STREAM_RATE;
+
      if (this.message_file_map && this.isOllama) {
-        const ollamaClient = new OllamaClient({ baseURL });
+        const ollamaClient = new OllamaClient({ baseURL, streamRate });
        return await ollamaClient.chatCompletion({
          payload: modelOptions,
          onProgress,
@ -1221,8 +1223,6 @@ ${convo}
            }
          });

-        const azureDelay = this.modelOptions.model?.includes('gpt-4') ? 30 : 17;
-
        for await (const chunk of stream) {
          const token = chunk.choices[0]?.delta?.content || '';
          intermediateReply += token;
@ -1232,9 +1232,7 @@ ${convo}
            break;
          }

-          if (this.azure) {
-            await sleep(azureDelay);
-          }
+          await sleep(streamRate);
        }

        if (!UnexpectedRoleError) {