fix: surface memfs errors in heartbeat with actionable diagnostics (#559)
Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Cameron <cameron@pfiffer.org>
This commit is contained in:
committed by
GitHub
parent
0aedc6b4c9
commit
535e5680c3
@@ -1,11 +1,15 @@
|
|||||||
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
|
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
|
||||||
import { writeFileSync, mkdirSync, unlinkSync, rmSync } from 'node:fs';
|
import { writeFileSync, mkdirSync, unlinkSync, rmSync, readFileSync, existsSync } from 'node:fs';
|
||||||
import { resolve } from 'node:path';
|
import { resolve } from 'node:path';
|
||||||
import { tmpdir } from 'node:os';
|
import { tmpdir, homedir } from 'node:os';
|
||||||
|
import { execSync } from 'node:child_process';
|
||||||
import { HeartbeatService, type HeartbeatConfig } from './heartbeat.js';
|
import { HeartbeatService, type HeartbeatConfig } from './heartbeat.js';
|
||||||
import { buildCustomHeartbeatPrompt, SILENT_MODE_PREFIX } from '../core/prompts.js';
|
import { buildCustomHeartbeatPrompt, SILENT_MODE_PREFIX } from '../core/prompts.js';
|
||||||
import type { AgentSession } from '../core/interfaces.js';
|
import type { AgentSession } from '../core/interfaces.js';
|
||||||
import { addTodo } from '../todo/store.js';
|
import { addTodo } from '../todo/store.js';
|
||||||
|
import { getCronLogPath } from '../utils/paths.js';
|
||||||
|
|
||||||
|
const HEARTBEAT_LOG_PATH = getCronLogPath();
|
||||||
|
|
||||||
// ── buildCustomHeartbeatPrompt ──────────────────────────────────────────
|
// ── buildCustomHeartbeatPrompt ──────────────────────────────────────────
|
||||||
|
|
||||||
@@ -271,3 +275,143 @@ describe('HeartbeatService prompt resolution', () => {
|
|||||||
expect(bot.sendToAgent).toHaveBeenCalledTimes(1);
|
expect(bot.sendToAgent).toHaveBeenCalledTimes(1);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ── Memfs health check ─────────────────────────────────────────────────
|
||||||
|
|
||||||
|
describe('HeartbeatService memfs health check', () => {
|
||||||
|
let tmpDir: string;
|
||||||
|
let memDir: string | undefined;
|
||||||
|
let originalDataDir: string | undefined;
|
||||||
|
let originalHome: string | undefined;
|
||||||
|
let testHome: string;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
tmpDir = resolve(tmpdir(), `heartbeat-memfs-test-${Date.now()}`);
|
||||||
|
testHome = resolve(tmpDir, 'fake-home');
|
||||||
|
mkdirSync(tmpDir, { recursive: true });
|
||||||
|
mkdirSync(testHome, { recursive: true });
|
||||||
|
originalDataDir = process.env.DATA_DIR;
|
||||||
|
originalHome = process.env.HOME;
|
||||||
|
process.env.DATA_DIR = tmpDir;
|
||||||
|
process.env.HOME = testHome;
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
if (originalDataDir === undefined) {
|
||||||
|
delete process.env.DATA_DIR;
|
||||||
|
} else {
|
||||||
|
process.env.DATA_DIR = originalDataDir;
|
||||||
|
}
|
||||||
|
if (originalHome === undefined) {
|
||||||
|
delete process.env.HOME;
|
||||||
|
} else {
|
||||||
|
process.env.HOME = originalHome;
|
||||||
|
}
|
||||||
|
try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||||
|
memDir = undefined;
|
||||||
|
});
|
||||||
|
|
||||||
|
it('emits heartbeat_memfs_dirty when memfs directory has untracked files', async () => {
|
||||||
|
// Set up a real git repo to act as the memory directory
|
||||||
|
const agentId = 'agent-memfs-test-' + Date.now();
|
||||||
|
memDir = resolve(homedir(), '.letta', 'agents', agentId, 'memory');
|
||||||
|
mkdirSync(memDir, { recursive: true });
|
||||||
|
execSync('git init', { cwd: memDir, stdio: 'ignore' });
|
||||||
|
// Create an untracked file
|
||||||
|
writeFileSync(resolve(memDir, 'untracked.md'), 'test');
|
||||||
|
|
||||||
|
const bot = createMockBot();
|
||||||
|
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||||
|
agentId,
|
||||||
|
conversationId: null,
|
||||||
|
channels: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = new HeartbeatService(bot, createConfig({
|
||||||
|
workingDir: tmpDir,
|
||||||
|
memfs: true,
|
||||||
|
}));
|
||||||
|
|
||||||
|
// Access private method for direct testing
|
||||||
|
const checkMemfsHealth = (service as any).checkMemfsHealth.bind(service);
|
||||||
|
|
||||||
|
expect(() => checkMemfsHealth()).not.toThrow();
|
||||||
|
await new Promise((resolvePromise) => setTimeout(resolvePromise, 10));
|
||||||
|
|
||||||
|
const logContents = existsSync(HEARTBEAT_LOG_PATH)
|
||||||
|
? readFileSync(HEARTBEAT_LOG_PATH, 'utf-8')
|
||||||
|
: '';
|
||||||
|
expect(logContents).toContain('heartbeat_memfs_dirty');
|
||||||
|
expect(logContents).toContain(agentId);
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips memfs check when memfs is disabled', async () => {
|
||||||
|
const bot = createMockBot();
|
||||||
|
const service = new HeartbeatService(bot, createConfig({
|
||||||
|
workingDir: tmpDir,
|
||||||
|
memfs: false,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const getMemoryDir = (service as any).getMemoryDir.bind(service);
|
||||||
|
expect(getMemoryDir()).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('skips memfs check when agent ID is not available', async () => {
|
||||||
|
const bot = createMockBot();
|
||||||
|
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||||
|
agentId: null,
|
||||||
|
conversationId: null,
|
||||||
|
channels: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = new HeartbeatService(bot, createConfig({
|
||||||
|
workingDir: tmpDir,
|
||||||
|
memfs: true,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const getMemoryDir = (service as any).getMemoryDir.bind(service);
|
||||||
|
expect(getMemoryDir()).toBeNull();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('resolves memory directory correctly when memfs is enabled', () => {
|
||||||
|
const bot = createMockBot();
|
||||||
|
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||||
|
agentId: 'agent-abc123',
|
||||||
|
conversationId: null,
|
||||||
|
channels: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = new HeartbeatService(bot, createConfig({
|
||||||
|
workingDir: tmpDir,
|
||||||
|
memfs: true,
|
||||||
|
}));
|
||||||
|
|
||||||
|
const getMemoryDir = (service as any).getMemoryDir.bind(service);
|
||||||
|
expect(getMemoryDir()).toBe(resolve(homedir(), '.letta', 'agents', 'agent-abc123', 'memory'));
|
||||||
|
});
|
||||||
|
|
||||||
|
it('still calls sendToAgent even when memfs check finds dirty files', async () => {
|
||||||
|
const agentId = 'agent-memfs-dirty-' + Date.now();
|
||||||
|
memDir = resolve(homedir(), '.letta', 'agents', agentId, 'memory');
|
||||||
|
mkdirSync(memDir, { recursive: true });
|
||||||
|
execSync('git init', { cwd: memDir, stdio: 'ignore' });
|
||||||
|
writeFileSync(resolve(memDir, 'dirty.md'), 'uncommitted content');
|
||||||
|
|
||||||
|
const bot = createMockBot();
|
||||||
|
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||||
|
agentId,
|
||||||
|
conversationId: null,
|
||||||
|
channels: [],
|
||||||
|
});
|
||||||
|
|
||||||
|
const service = new HeartbeatService(bot, createConfig({
|
||||||
|
workingDir: tmpDir,
|
||||||
|
memfs: true,
|
||||||
|
}));
|
||||||
|
|
||||||
|
await service.trigger();
|
||||||
|
|
||||||
|
// sendToAgent should still be called (memfs check is non-blocking)
|
||||||
|
expect(bot.sendToAgent).toHaveBeenCalledTimes(1);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|||||||
@@ -7,8 +7,10 @@
|
|||||||
* The agent must use `lettabot-message` CLI via Bash to contact the user.
|
* The agent must use `lettabot-message` CLI via Bash to contact the user.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import { appendFileSync, mkdirSync, readFileSync } from 'node:fs';
|
import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs';
|
||||||
import { resolve, dirname } from 'node:path';
|
import { resolve, dirname, join } from 'node:path';
|
||||||
|
import { homedir } from 'node:os';
|
||||||
|
import { execFileSync } from 'node:child_process';
|
||||||
import type { AgentSession } from '../core/interfaces.js';
|
import type { AgentSession } from '../core/interfaces.js';
|
||||||
import type { TriggerContext } from '../core/types.js';
|
import type { TriggerContext } from '../core/types.js';
|
||||||
import { buildHeartbeatPrompt, buildCustomHeartbeatPrompt } from '../core/prompts.js';
|
import { buildHeartbeatPrompt, buildCustomHeartbeatPrompt } from '../core/prompts.js';
|
||||||
@@ -49,6 +51,9 @@ export interface HeartbeatConfig {
|
|||||||
workingDir: string;
|
workingDir: string;
|
||||||
agentKey: string;
|
agentKey: string;
|
||||||
|
|
||||||
|
// Whether memfs (git-backed memory filesystem) is enabled for this agent
|
||||||
|
memfs?: boolean;
|
||||||
|
|
||||||
// Custom heartbeat prompt (optional)
|
// Custom heartbeat prompt (optional)
|
||||||
prompt?: string;
|
prompt?: string;
|
||||||
|
|
||||||
@@ -82,6 +87,57 @@ export class HeartbeatService {
|
|||||||
}
|
}
|
||||||
return Math.floor(raw * 60 * 1000);
|
return Math.floor(raw * 60 * 1000);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Resolve the memory directory for this agent.
|
||||||
|
* Returns null if memfs is disabled or agent ID is unavailable.
|
||||||
|
*/
|
||||||
|
private getMemoryDir(): string | null {
|
||||||
|
if (!this.config.memfs) return null;
|
||||||
|
const agentId = this.bot.getStatus().agentId;
|
||||||
|
if (!agentId) return null;
|
||||||
|
return join(homedir(), '.letta', 'agents', agentId, 'memory');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if the memfs git repo has untracked or uncommitted files.
|
||||||
|
* Logs a warning if it does. Non-fatal: heartbeat proceeds regardless.
|
||||||
|
*/
|
||||||
|
private checkMemfsHealth(): void {
|
||||||
|
const memoryDir = this.getMemoryDir();
|
||||||
|
if (!memoryDir) return;
|
||||||
|
|
||||||
|
if (!existsSync(memoryDir)) {
|
||||||
|
log.debug(`Memory directory does not exist yet: ${memoryDir}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const output = execFileSync('git', ['status', '--porcelain'], {
|
||||||
|
cwd: memoryDir,
|
||||||
|
encoding: 'utf-8',
|
||||||
|
timeout: 5000,
|
||||||
|
}).trim();
|
||||||
|
|
||||||
|
if (output) {
|
||||||
|
const lines = output.split('\n');
|
||||||
|
log.warn(
|
||||||
|
`Memory directory has ${lines.length} uncommitted/untracked file(s). ` +
|
||||||
|
`This may cause heartbeat failures. Run "cd ${memoryDir} && git add -A && git commit -m 'sync'" to fix. ` +
|
||||||
|
`Files: ${lines.slice(0, 5).join(', ')}${lines.length > 5 ? ` (and ${lines.length - 5} more)` : ''}`,
|
||||||
|
);
|
||||||
|
logEvent('heartbeat_memfs_dirty', {
|
||||||
|
memoryDir,
|
||||||
|
fileCount: lines.length,
|
||||||
|
files: lines.slice(0, 10),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (err) {
|
||||||
|
log.warn(
|
||||||
|
`Failed to check memfs health in ${memoryDir}: ${err instanceof Error ? err.message : String(err)}`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Start the heartbeat timer
|
* Start the heartbeat timer
|
||||||
@@ -168,6 +224,9 @@ export class HeartbeatService {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Pre-flight: check for dirty memfs state that could cause session init failures
|
||||||
|
this.checkMemfsHealth();
|
||||||
|
|
||||||
log.info(`Sending heartbeat to agent...`);
|
log.info(`Sending heartbeat to agent...`);
|
||||||
|
|
||||||
logEvent('heartbeat_running', {
|
logEvent('heartbeat_running', {
|
||||||
@@ -226,10 +285,22 @@ export class HeartbeatService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||||
log.error('Error:', error);
|
log.error('Error:', error);
|
||||||
logEvent('heartbeat_error', {
|
|
||||||
error: error instanceof Error ? error.message : String(error),
|
// Surface git/memfs-related errors with actionable diagnostics
|
||||||
});
|
if (/\b(git|memfs|memory)\b/i.test(errorMsg)) {
|
||||||
|
const memoryDir = this.getMemoryDir();
|
||||||
|
log.warn(
|
||||||
|
`Heartbeat failed due to a git/memfs error. ` +
|
||||||
|
`This often happens when the memory directory has untracked or uncommitted files. ` +
|
||||||
|
(memoryDir
|
||||||
|
? `Check: cd ${memoryDir} && git status`
|
||||||
|
: `Enable memfs or check LETTA_AGENT_ID to diagnose.`),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
logEvent('heartbeat_error', { error: errorMsg });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -455,6 +455,7 @@ async function main() {
|
|||||||
intervalMinutes: heartbeatConfig?.intervalMin ?? 240,
|
intervalMinutes: heartbeatConfig?.intervalMin ?? 240,
|
||||||
skipRecentUserMinutes: heartbeatConfig?.skipRecentUserMin ?? globalConfig.heartbeatSkipRecentUserMin,
|
skipRecentUserMinutes: heartbeatConfig?.skipRecentUserMin ?? globalConfig.heartbeatSkipRecentUserMin,
|
||||||
agentKey: agentConfig.name,
|
agentKey: agentConfig.name,
|
||||||
|
memfs: resolvedMemfs,
|
||||||
prompt: heartbeatConfig?.prompt || process.env.HEARTBEAT_PROMPT,
|
prompt: heartbeatConfig?.prompt || process.env.HEARTBEAT_PROMPT,
|
||||||
promptFile: heartbeatConfig?.promptFile,
|
promptFile: heartbeatConfig?.promptFile,
|
||||||
workingDir: resolvedWorkingDir,
|
workingDir: resolvedWorkingDir,
|
||||||
|
|||||||
Reference in New Issue
Block a user