fix: recover from missing/deleted agents during session init (#431)

This commit is contained in:
Cameron
2026-02-27 14:52:40 -08:00
committed by GitHub
parent 0832c8d032
commit 4a23c6d0f7
3 changed files with 142 additions and 1 deletions

View File

@@ -60,6 +60,30 @@ function isConversationMissingError(error: unknown): boolean {
return false;
}
/**
* Detect if a session initialization error indicates the agent doesn't exist.
* The SDK includes CLI stderr in the error message when the subprocess exits
* before sending an init message. We check for agent-not-found indicators in
* both the SDK-level message and the CLI stderr output it includes.
*
* This intentionally does NOT treat generic init failures (like "no init
* message received") as recoverable. Those can be transient SDK/process
* issues, and clearing persisted agent state in those cases can destroy
* valid mappings.
*/
function isAgentMissingFromInitError(error: unknown): boolean {
if (!(error instanceof Error)) return false;
const msg = error.message.toLowerCase();
const agentMissingPatterns = [
/\bagent\b[^.\n]{0,80}\bnot found\b/,
/\bnot found\b[^.\n]{0,80}\bagent\b/,
/\bagent\b[^.\n]{0,80}\bdoes not exist\b/,
/\bunknown agent\b/,
/\bagent_not_found\b/,
];
return agentMissingPatterns.some((pattern) => pattern.test(msg));
}
/**
* Map a structured API error into a clear, user-facing message.
* The `error` object comes from the SDK's new SDKErrorMessage type.
@@ -1005,6 +1029,20 @@ export class LettaBot implements AgentSession {
} catch (error) {
// Close immediately so failed initialization cannot leak a subprocess.
session.close();
// If the stored agent ID doesn't exist on the server (deleted externally,
// ghost agent from failed pairing, etc.), clear the stale ID and retry.
// The retry will hit the "else" branch and create a fresh agent.
// Uses bootstrapRetried to prevent infinite recursion if creation also fails.
if (this.store.agentId && !bootstrapRetried && isAgentMissingFromInitError(error)) {
log.warn(
`Agent ${this.store.agentId} appears missing from server, ` +
`clearing stale agent ID and recreating...`,
);
this.store.clearAgent();
return this._createSessionForKey(key, /* bootstrapRetried */ true, generation);
}
throw error;
}

View File

@@ -12,7 +12,7 @@ vi.mock('@letta-ai/letta-code-sdk', () => ({
}));
vi.mock('../tools/letta-api.js', () => ({
updateAgentName: vi.fn(),
updateAgentName: vi.fn().mockResolvedValue(undefined),
getPendingApprovals: vi.fn(),
rejectApproval: vi.fn(),
cancelRuns: vi.fn(),
@@ -186,6 +186,95 @@ describe('SDK session contract', () => {
expect(mockSession.close).toHaveBeenCalledTimes(1);
});
it('recreates agent after explicit agent-not-found initialize error', async () => {
delete process.env.LETTA_AGENT_ID;
const staleSession = {
initialize: vi.fn(async () => {
throw new Error('No init message received from subprocess. stderr: {"detail":"Agent agent-contract-test not found"}');
}),
send: vi.fn(async (_message: unknown) => undefined),
stream: vi.fn(() =>
(async function* () {
yield { type: 'result', success: true };
})()
),
close: vi.fn(() => undefined),
agentId: 'agent-contract-test',
conversationId: 'conv-stale',
};
const recoveredSession = {
initialize: vi.fn(async () => undefined),
send: vi.fn(async (_message: unknown) => undefined),
stream: vi.fn(() =>
(async function* () {
yield { type: 'assistant', content: 'fresh response' };
yield { type: 'result', success: true };
})()
),
close: vi.fn(() => undefined),
agentId: 'agent-recreated',
conversationId: 'conv-recreated',
};
vi.mocked(createAgent).mockResolvedValue('agent-recreated');
vi.mocked(createSession)
.mockReturnValueOnce(staleSession as never)
.mockReturnValueOnce(recoveredSession as never);
const bot = new LettaBot({
workingDir: join(dataDir, 'working'),
allowedTools: [],
agentName: 'ContractBot',
});
bot.setAgentId('agent-contract-test');
const response = await bot.sendToAgent('recover me');
expect(response).toBe('fresh response');
expect(staleSession.close).toHaveBeenCalledTimes(1);
expect(vi.mocked(createAgent)).toHaveBeenCalledTimes(1);
expect(vi.mocked(createSession)).toHaveBeenCalledTimes(2);
expect(vi.mocked(createSession).mock.calls[0][0]).toBe('agent-contract-test');
expect(vi.mocked(createSession).mock.calls[1][0]).toBe('agent-recreated');
});
it('does not clear agent state on generic initialize failures', async () => {
const initFailure = new Error('No init message received from subprocess');
const failingSession = {
initialize: vi.fn(async () => {
throw initFailure;
}),
send: vi.fn(async (_message: unknown) => undefined),
stream: vi.fn(() =>
(async function* () {
yield { type: 'result', success: true };
})()
),
close: vi.fn(() => undefined),
agentId: 'agent-contract-test',
conversationId: 'conv-keep',
};
vi.mocked(resumeSession).mockReturnValue(failingSession as never);
const bot = new LettaBot({
workingDir: join(dataDir, 'working'),
allowedTools: [],
});
bot.setAgentId('agent-contract-test');
const botInternal = bot as unknown as { store: { conversationId: string | null } };
botInternal.store.conversationId = 'conv-keep';
await expect(bot.sendToAgent('should fail')).rejects.toThrow('No init message received from subprocess');
expect(failingSession.close).toHaveBeenCalledTimes(1);
expect(vi.mocked(createAgent)).not.toHaveBeenCalled();
expect(vi.mocked(createSession)).not.toHaveBeenCalled();
expect(vi.mocked(resumeSession)).toHaveBeenCalledTimes(1);
expect(bot.getStatus().agentId).toBe('agent-contract-test');
expect(bot.getStatus().conversationId).toBe('conv-keep');
});
it('invalidates retry session when fallback send fails after conversation-missing error', async () => {
const missingConversation = new Error('conversation not found');
const retryFailure = new Error('network down');

View File

@@ -401,6 +401,20 @@ export class Store {
return stored !== current;
}
/**
* Clear only the agent ID and all associated conversation IDs.
* The next ensureSessionForKey() call will create a fresh agent.
* Unlike reset(), this preserves other agent metadata (recovery attempts, etc.).
*/
clearAgent(): void {
const agent = this.agentData();
agent.agentId = null;
agent.conversationId = null;
agent.conversations = undefined;
agent.baseUrl = undefined;
this.save();
}
reset(): void {
this.data.agents[this.agentName] = { agentId: null };
this.save();