🪐 feat: Cross-replica support in GenerationJobManager for Redis mode (#11233)
Some checks are pending
Docker Dev Branch Images Build / build (Dockerfile, lc-dev, node) (push) Waiting to run
Docker Dev Branch Images Build / build (Dockerfile.multi, lc-dev-api, api-build) (push) Waiting to run

* feat: Implement cross-replica support in GenerationJobManager for Redis mode

- Enhanced GenerationJobManager to support cross-replica scenarios by lazily creating runtime states from Redis when jobs exist but are not present in local memory.
- Added functionality to persist `syncSent` and `finalEvent` states to Redis for consistency across replicas.
- Implemented abort signal handling to allow replicas to receive and respond to abort requests from other instances, ensuring proper job termination.
- Updated tests to validate cross-replica behavior, including job retrieval, subscription, and abort signal propagation.

This update improves the robustness and reliability of job management in distributed environments.

* fix: Enhance error handling and implement abort signal for cross-replica jobs in GenerationJobManager

- Added error handling for Redis job updates in GenerationJobManager to log failures when persisting `syncSent` and `finalEvent` states.
- Implemented a listener for cross-replica abort signals, ensuring that lazily-initialized jobs can respond to abort requests from other replicas.
- Introduced a new integration test to validate the handling of abort signals for lazily-initialized jobs across replicas.

These changes improve the reliability and robustness of job management in distributed environments.
This commit is contained in:
Danny Avila 2026-01-06 11:39:24 -05:00 committed by GitHub
parent b5aa38ff33
commit a7645f4705
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 826 additions and 7 deletions

View file

@ -17,6 +17,7 @@ const EventTypes = {
CHUNK: 'chunk',
DONE: 'done',
ERROR: 'error',
ABORT: 'abort',
} as const;
interface PubSubMessage {
@ -39,6 +40,8 @@ interface StreamSubscribers {
}
>;
allSubscribersLeftCallbacks: Array<() => void>;
/** Abort callbacks - called when abort signal is received from any replica */
abortCallbacks: Array<() => void>;
}
/**
@ -119,6 +122,20 @@ export class RedisEventTransport implements IEventTransport {
case EventTypes.ERROR:
handlers.onError?.(parsed.error ?? 'Unknown error');
break;
case EventTypes.ABORT:
// Abort is handled at stream level, not per-handler
break;
}
}
// Handle abort signals at stream level (not per-handler)
if (parsed.type === EventTypes.ABORT) {
for (const callback of streamState.abortCallbacks) {
try {
callback();
} catch (err) {
logger.error(`[RedisEventTransport] Error in abort callback:`, err);
}
}
}
} catch (err) {
@ -149,6 +166,7 @@ export class RedisEventTransport implements IEventTransport {
count: 0,
handlers: new Map(),
allSubscribersLeftCallbacks: [],
abortCallbacks: [],
});
}
@ -263,6 +281,53 @@ export class RedisEventTransport implements IEventTransport {
count: 0,
handlers: new Map(),
allSubscribersLeftCallbacks: [callback],
abortCallbacks: [],
});
}
}
/**
* Publish an abort signal to all replicas.
* This enables cross-replica abort: when a user aborts on Replica B,
* the generating Replica A receives the signal and stops.
*/
emitAbort(streamId: string): void {
const channel = CHANNELS.events(streamId);
const message: PubSubMessage = { type: EventTypes.ABORT };
this.publisher.publish(channel, JSON.stringify(message)).catch((err) => {
logger.error(`[RedisEventTransport] Failed to publish abort:`, err);
});
}
/**
* Register callback for abort signals from any replica.
* Called when abort is triggered on any replica (including this one).
*
* @param streamId - The stream identifier
* @param callback - Called when abort signal is received
*/
onAbort(streamId: string, callback: () => void): void {
const channel = CHANNELS.events(streamId);
let state = this.streams.get(streamId);
if (!state) {
state = {
count: 0,
handlers: new Map(),
allSubscribersLeftCallbacks: [],
abortCallbacks: [],
};
this.streams.set(streamId, state);
}
state.abortCallbacks.push(callback);
// Subscribe to Redis channel if not already subscribed
if (!this.subscribedChannels.has(channel)) {
this.subscribedChannels.add(channel);
this.subscriber.subscribe(channel).catch((err) => {
logger.error(`[RedisEventTransport] Failed to subscribe to ${channel}:`, err);
});
}
}
@ -282,9 +347,10 @@ export class RedisEventTransport implements IEventTransport {
const state = this.streams.get(streamId);
if (state) {
// Clear all handlers
// Clear all handlers and callbacks
state.handlers.clear();
state.allSubscribersLeftCallbacks = [];
state.abortCallbacks = [];
}
// Unsubscribe from Redis channel