🏁 fix: Message Race Condition if Cancelled Early (#11462)

* 🔧 fix: Prevent race conditions in message saving during abort scenarios

* Added logic to save partial responses before returning from the abort endpoint to ensure parentMessageId exists in the database.
* Updated the ResumableAgentController to save response messages before emitting final events, preventing orphaned parentMessageIds.
* Enhanced handling of unfinished responses to improve stability and data integrity in agent interactions.

* 🔧 fix: logging and job replacement handling in ResumableAgentController

* Added detailed logging for job creation and final event emissions to improve traceability.
* Implemented logic to check for job replacement before emitting events, preventing stale requests from affecting newer jobs.
* Updated abort handling to log additional context about the abort result, enhancing debugging capabilities.

* refactor: abort handling and token spending logic in AgentStream

* Added authorization check for abort attempts to prevent unauthorized access.
* Improved response message saving logic to ensure valid message IDs are stored.
* Implemented token spending for aborted requests to prevent double-spending across parallel agents.
* Enhanced logging for better traceability of token spending operations during abort scenarios.

* refactor: remove TODO comments for token spending in abort handling

* Removed outdated TODO comments regarding token spending for aborted requests in the abort endpoint.
* This change streamlines the code and clarifies the current implementation status.

*  test: Add comprehensive tests for job replacement and abort handling

* Introduced unit tests for job replacement detection in ResumableAgentController, covering job creation timestamp tracking, stale job detection, and response message saving order.
* Added tests for the agent abort endpoint, ensuring proper authorization checks, early abort handling, and partial response saving.
* Enhanced logging and error handling in tests to improve traceability and robustness of the abort functionality.
This commit is contained in:
Danny Avila 2026-01-21 13:57:12 -05:00 committed by GitHub
parent dea246934e
commit 11210d8b98
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 682 additions and 12 deletions

View file

@ -0,0 +1,301 @@
/**
* Tests for the agent abort endpoint
*
* Tests the following fixes from PR #11462:
* 1. Authorization check - only job owner can abort
* 2. Early abort handling - skip save when no responseMessageId
* 3. Partial response saving - save message before returning
*/
const express = require('express');
const request = require('supertest');
const mockLogger = {
debug: jest.fn(),
warn: jest.fn(),
error: jest.fn(),
info: jest.fn(),
};
const mockGenerationJobManager = {
getJob: jest.fn(),
abortJob: jest.fn(),
getActiveJobIdsForUser: jest.fn(),
};
const mockSaveMessage = jest.fn();
jest.mock('@librechat/data-schemas', () => ({
logger: mockLogger,
}));
jest.mock('@librechat/api', () => ({
isEnabled: jest.fn().mockReturnValue(false),
GenerationJobManager: mockGenerationJobManager,
}));
jest.mock('~/models', () => ({
saveMessage: (...args) => mockSaveMessage(...args),
}));
jest.mock('~/server/middleware', () => ({
uaParser: (req, res, next) => next(),
checkBan: (req, res, next) => next(),
requireJwtAuth: (req, res, next) => {
req.user = { id: 'test-user-123' };
next();
},
messageIpLimiter: (req, res, next) => next(),
configMiddleware: (req, res, next) => next(),
messageUserLimiter: (req, res, next) => next(),
}));
// Mock the chat module - needs to be a router
jest.mock('~/server/routes/agents/chat', () => require('express').Router());
// Mock the v1 module - v1 is directly used as middleware
jest.mock('~/server/routes/agents/v1', () => ({
v1: require('express').Router(),
}));
// Import after mocks
const agentRoutes = require('~/server/routes/agents/index');
describe('Agent Abort Endpoint', () => {
let app;
beforeAll(() => {
app = express();
app.use(express.json());
app.use('/api/agents', agentRoutes);
});
beforeEach(() => {
jest.clearAllMocks();
});
describe('POST /chat/abort', () => {
describe('Authorization', () => {
it("should return 403 when user tries to abort another user's job", async () => {
const jobStreamId = 'test-stream-123';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: { userId: 'other-user-456' },
});
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
expect(response.status).toBe(403);
expect(response.body).toEqual({ error: 'Unauthorized' });
expect(mockLogger.warn).toHaveBeenCalledWith(
expect.stringContaining('Unauthorized abort attempt'),
);
expect(mockGenerationJobManager.abortJob).not.toHaveBeenCalled();
});
it('should allow abort when user owns the job', async () => {
const jobStreamId = 'test-stream-123';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: { userId: 'test-user-123' },
});
mockGenerationJobManager.abortJob.mockResolvedValue({
success: true,
jobData: null,
content: [],
text: '',
});
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
expect(response.status).toBe(200);
expect(response.body).toEqual({ success: true, aborted: jobStreamId });
expect(mockGenerationJobManager.abortJob).toHaveBeenCalledWith(jobStreamId);
});
it('should allow abort when job has no userId metadata (backwards compatibility)', async () => {
const jobStreamId = 'test-stream-123';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: {},
});
mockGenerationJobManager.abortJob.mockResolvedValue({
success: true,
jobData: null,
content: [],
text: '',
});
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
expect(response.status).toBe(200);
expect(response.body).toEqual({ success: true, aborted: jobStreamId });
});
});
describe('Early Abort Handling', () => {
it('should skip message saving when responseMessageId is missing (early abort)', async () => {
const jobStreamId = 'test-stream-123';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: { userId: 'test-user-123' },
});
mockGenerationJobManager.abortJob.mockResolvedValue({
success: true,
jobData: {
userMessage: { messageId: 'user-msg-123' },
// No responseMessageId - early abort before generation started
conversationId: jobStreamId,
},
content: [],
text: '',
});
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
expect(response.status).toBe(200);
expect(mockSaveMessage).not.toHaveBeenCalled();
});
it('should skip message saving when userMessage is missing', async () => {
const jobStreamId = 'test-stream-123';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: { userId: 'test-user-123' },
});
mockGenerationJobManager.abortJob.mockResolvedValue({
success: true,
jobData: {
// No userMessage
responseMessageId: 'response-msg-123',
conversationId: jobStreamId,
},
content: [],
text: '',
});
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
expect(response.status).toBe(200);
expect(mockSaveMessage).not.toHaveBeenCalled();
});
});
describe('Partial Response Saving', () => {
it('should save partial response when both userMessage and responseMessageId exist', async () => {
const jobStreamId = 'test-stream-123';
const userMessageId = 'user-msg-123';
const responseMessageId = 'response-msg-456';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: { userId: 'test-user-123' },
});
mockGenerationJobManager.abortJob.mockResolvedValue({
success: true,
jobData: {
userMessage: { messageId: userMessageId },
responseMessageId,
conversationId: jobStreamId,
sender: 'TestAgent',
endpoint: 'anthropic',
model: 'claude-3',
},
content: [{ type: 'text', text: 'Partial response...' }],
text: 'Partial response...',
});
mockSaveMessage.mockResolvedValue();
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
expect(response.status).toBe(200);
expect(mockSaveMessage).toHaveBeenCalledWith(
expect.anything(),
expect.objectContaining({
messageId: responseMessageId,
parentMessageId: userMessageId,
conversationId: jobStreamId,
content: [{ type: 'text', text: 'Partial response...' }],
text: 'Partial response...',
sender: 'TestAgent',
endpoint: 'anthropic',
model: 'claude-3',
unfinished: true,
error: false,
isCreatedByUser: false,
user: 'test-user-123',
}),
expect.objectContaining({
context: 'api/server/routes/agents/index.js - abort endpoint',
}),
);
});
it('should handle saveMessage errors gracefully', async () => {
const jobStreamId = 'test-stream-123';
mockGenerationJobManager.getJob.mockResolvedValue({
metadata: { userId: 'test-user-123' },
});
mockGenerationJobManager.abortJob.mockResolvedValue({
success: true,
jobData: {
userMessage: { messageId: 'user-msg-123' },
responseMessageId: 'response-msg-456',
conversationId: jobStreamId,
},
content: [],
text: '',
});
mockSaveMessage.mockRejectedValue(new Error('Database error'));
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: jobStreamId });
// Should still return success even if save fails
expect(response.status).toBe(200);
expect(response.body).toEqual({ success: true, aborted: jobStreamId });
expect(mockLogger.error).toHaveBeenCalledWith(
expect.stringContaining('Failed to save partial response'),
);
});
});
describe('Job Not Found', () => {
it('should return 404 when job is not found', async () => {
mockGenerationJobManager.getJob.mockResolvedValue(null);
mockGenerationJobManager.getActiveJobIdsForUser.mockResolvedValue([]);
const response = await request(app)
.post('/api/agents/chat/abort')
.send({ conversationId: 'non-existent-job' });
expect(response.status).toBe(404);
expect(response.body).toEqual({
error: 'Job not found',
streamId: 'non-existent-job',
});
});
});
});
});

View file

@ -9,6 +9,7 @@ const {
configMiddleware,
messageUserLimiter,
} = require('~/server/middleware');
const { saveMessage } = require('~/models');
const { v1 } = require('./v1');
const chat = require('./chat');
@ -194,9 +195,53 @@ router.post('/chat/abort', async (req, res) => {
logger.debug(`[AgentStream] Computed jobStreamId: ${jobStreamId}`);
if (job && jobStreamId) {
if (job.metadata?.userId && job.metadata.userId !== userId) {
logger.warn(`[AgentStream] Unauthorized abort attempt for ${jobStreamId} by user ${userId}`);
return res.status(403).json({ error: 'Unauthorized' });
}
logger.debug(`[AgentStream] Job found, aborting: ${jobStreamId}`);
await GenerationJobManager.abortJob(jobStreamId);
logger.debug(`[AgentStream] Job aborted successfully: ${jobStreamId}`);
const abortResult = await GenerationJobManager.abortJob(jobStreamId);
logger.debug(`[AgentStream] Job aborted successfully: ${jobStreamId}`, {
abortResultSuccess: abortResult.success,
abortResultUserMessageId: abortResult.jobData?.userMessage?.messageId,
abortResultResponseMessageId: abortResult.jobData?.responseMessageId,
});
// CRITICAL: Save partial response BEFORE returning to prevent race condition.
// If user sends a follow-up immediately after abort, the parentMessageId must exist in DB.
// Only save if we have a valid responseMessageId (skip early aborts before generation started)
if (
abortResult.success &&
abortResult.jobData?.userMessage?.messageId &&
abortResult.jobData?.responseMessageId
) {
const { jobData, content, text } = abortResult;
const responseMessage = {
messageId: jobData.responseMessageId,
parentMessageId: jobData.userMessage.messageId,
conversationId: jobData.conversationId,
content: content || [],
text: text || '',
sender: jobData.sender || 'AI',
endpoint: jobData.endpoint,
model: jobData.model,
unfinished: true,
error: false,
isCreatedByUser: false,
user: userId,
};
try {
await saveMessage(req, responseMessage, {
context: 'api/server/routes/agents/index.js - abort endpoint',
});
logger.debug(`[AgentStream] Saved partial response for: ${jobStreamId}`);
} catch (saveError) {
logger.error(`[AgentStream] Failed to save partial response: ${saveError.message}`);
}
}
return res.json({ success: true, aborted: jobStreamId });
}