fix: recover from missing/deleted agents during session init (#431)
This commit is contained in:
@@ -60,6 +60,30 @@ function isConversationMissingError(error: unknown): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect if a session initialization error indicates the agent doesn't exist.
|
||||
* The SDK includes CLI stderr in the error message when the subprocess exits
|
||||
* before sending an init message. We check for agent-not-found indicators in
|
||||
* both the SDK-level message and the CLI stderr output it includes.
|
||||
*
|
||||
* This intentionally does NOT treat generic init failures (like "no init
|
||||
* message received") as recoverable. Those can be transient SDK/process
|
||||
* issues, and clearing persisted agent state in those cases can destroy
|
||||
* valid mappings.
|
||||
*/
|
||||
function isAgentMissingFromInitError(error: unknown): boolean {
|
||||
if (!(error instanceof Error)) return false;
|
||||
const msg = error.message.toLowerCase();
|
||||
const agentMissingPatterns = [
|
||||
/\bagent\b[^.\n]{0,80}\bnot found\b/,
|
||||
/\bnot found\b[^.\n]{0,80}\bagent\b/,
|
||||
/\bagent\b[^.\n]{0,80}\bdoes not exist\b/,
|
||||
/\bunknown agent\b/,
|
||||
/\bagent_not_found\b/,
|
||||
];
|
||||
return agentMissingPatterns.some((pattern) => pattern.test(msg));
|
||||
}
|
||||
|
||||
/**
|
||||
* Map a structured API error into a clear, user-facing message.
|
||||
* The `error` object comes from the SDK's new SDKErrorMessage type.
|
||||
@@ -1005,6 +1029,20 @@ export class LettaBot implements AgentSession {
|
||||
} catch (error) {
|
||||
// Close immediately so failed initialization cannot leak a subprocess.
|
||||
session.close();
|
||||
|
||||
// If the stored agent ID doesn't exist on the server (deleted externally,
|
||||
// ghost agent from failed pairing, etc.), clear the stale ID and retry.
|
||||
// The retry will hit the "else" branch and create a fresh agent.
|
||||
// Uses bootstrapRetried to prevent infinite recursion if creation also fails.
|
||||
if (this.store.agentId && !bootstrapRetried && isAgentMissingFromInitError(error)) {
|
||||
log.warn(
|
||||
`Agent ${this.store.agentId} appears missing from server, ` +
|
||||
`clearing stale agent ID and recreating...`,
|
||||
);
|
||||
this.store.clearAgent();
|
||||
return this._createSessionForKey(key, /* bootstrapRetried */ true, generation);
|
||||
}
|
||||
|
||||
throw error;
|
||||
}
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({
|
||||
}));
|
||||
|
||||
vi.mock('../tools/letta-api.js', () => ({
|
||||
updateAgentName: vi.fn(),
|
||||
updateAgentName: vi.fn().mockResolvedValue(undefined),
|
||||
getPendingApprovals: vi.fn(),
|
||||
rejectApproval: vi.fn(),
|
||||
cancelRuns: vi.fn(),
|
||||
@@ -186,6 +186,95 @@ describe('SDK session contract', () => {
|
||||
expect(mockSession.close).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
|
||||
it('recreates agent after explicit agent-not-found initialize error', async () => {
|
||||
delete process.env.LETTA_AGENT_ID;
|
||||
|
||||
const staleSession = {
|
||||
initialize: vi.fn(async () => {
|
||||
throw new Error('No init message received from subprocess. stderr: {"detail":"Agent agent-contract-test not found"}');
|
||||
}),
|
||||
send: vi.fn(async (_message: unknown) => undefined),
|
||||
stream: vi.fn(() =>
|
||||
(async function* () {
|
||||
yield { type: 'result', success: true };
|
||||
})()
|
||||
),
|
||||
close: vi.fn(() => undefined),
|
||||
agentId: 'agent-contract-test',
|
||||
conversationId: 'conv-stale',
|
||||
};
|
||||
|
||||
const recoveredSession = {
|
||||
initialize: vi.fn(async () => undefined),
|
||||
send: vi.fn(async (_message: unknown) => undefined),
|
||||
stream: vi.fn(() =>
|
||||
(async function* () {
|
||||
yield { type: 'assistant', content: 'fresh response' };
|
||||
yield { type: 'result', success: true };
|
||||
})()
|
||||
),
|
||||
close: vi.fn(() => undefined),
|
||||
agentId: 'agent-recreated',
|
||||
conversationId: 'conv-recreated',
|
||||
};
|
||||
|
||||
vi.mocked(createAgent).mockResolvedValue('agent-recreated');
|
||||
vi.mocked(createSession)
|
||||
.mockReturnValueOnce(staleSession as never)
|
||||
.mockReturnValueOnce(recoveredSession as never);
|
||||
|
||||
const bot = new LettaBot({
|
||||
workingDir: join(dataDir, 'working'),
|
||||
allowedTools: [],
|
||||
agentName: 'ContractBot',
|
||||
});
|
||||
bot.setAgentId('agent-contract-test');
|
||||
|
||||
const response = await bot.sendToAgent('recover me');
|
||||
expect(response).toBe('fresh response');
|
||||
expect(staleSession.close).toHaveBeenCalledTimes(1);
|
||||
expect(vi.mocked(createAgent)).toHaveBeenCalledTimes(1);
|
||||
expect(vi.mocked(createSession)).toHaveBeenCalledTimes(2);
|
||||
expect(vi.mocked(createSession).mock.calls[0][0]).toBe('agent-contract-test');
|
||||
expect(vi.mocked(createSession).mock.calls[1][0]).toBe('agent-recreated');
|
||||
});
|
||||
|
||||
it('does not clear agent state on generic initialize failures', async () => {
|
||||
const initFailure = new Error('No init message received from subprocess');
|
||||
const failingSession = {
|
||||
initialize: vi.fn(async () => {
|
||||
throw initFailure;
|
||||
}),
|
||||
send: vi.fn(async (_message: unknown) => undefined),
|
||||
stream: vi.fn(() =>
|
||||
(async function* () {
|
||||
yield { type: 'result', success: true };
|
||||
})()
|
||||
),
|
||||
close: vi.fn(() => undefined),
|
||||
agentId: 'agent-contract-test',
|
||||
conversationId: 'conv-keep',
|
||||
};
|
||||
|
||||
vi.mocked(resumeSession).mockReturnValue(failingSession as never);
|
||||
|
||||
const bot = new LettaBot({
|
||||
workingDir: join(dataDir, 'working'),
|
||||
allowedTools: [],
|
||||
});
|
||||
bot.setAgentId('agent-contract-test');
|
||||
const botInternal = bot as unknown as { store: { conversationId: string | null } };
|
||||
botInternal.store.conversationId = 'conv-keep';
|
||||
|
||||
await expect(bot.sendToAgent('should fail')).rejects.toThrow('No init message received from subprocess');
|
||||
expect(failingSession.close).toHaveBeenCalledTimes(1);
|
||||
expect(vi.mocked(createAgent)).not.toHaveBeenCalled();
|
||||
expect(vi.mocked(createSession)).not.toHaveBeenCalled();
|
||||
expect(vi.mocked(resumeSession)).toHaveBeenCalledTimes(1);
|
||||
expect(bot.getStatus().agentId).toBe('agent-contract-test');
|
||||
expect(bot.getStatus().conversationId).toBe('conv-keep');
|
||||
});
|
||||
|
||||
it('invalidates retry session when fallback send fails after conversation-missing error', async () => {
|
||||
const missingConversation = new Error('conversation not found');
|
||||
const retryFailure = new Error('network down');
|
||||
|
||||
@@ -401,6 +401,20 @@ export class Store {
|
||||
return stored !== current;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear only the agent ID and all associated conversation IDs.
|
||||
* The next ensureSessionForKey() call will create a fresh agent.
|
||||
* Unlike reset(), this preserves other agent metadata (recovery attempts, etc.).
|
||||
*/
|
||||
clearAgent(): void {
|
||||
const agent = this.agentData();
|
||||
agent.agentId = null;
|
||||
agent.conversationId = null;
|
||||
agent.conversations = undefined;
|
||||
agent.baseUrl = undefined;
|
||||
this.save();
|
||||
}
|
||||
|
||||
reset(): void {
|
||||
this.data.agents[this.agentName] = { agentId: null };
|
||||
this.save();
|
||||
|
||||
Reference in New Issue
Block a user