fix: surface memfs errors in heartbeat with actionable diagnostics (#559)
Co-authored-by: letta-code <248085862+letta-code@users.noreply.github.com> Co-authored-by: Cameron <cameron@pfiffer.org>
This commit is contained in:
committed by
GitHub
parent
0aedc6b4c9
commit
535e5680c3
@@ -1,11 +1,15 @@
|
||||
import { describe, expect, it, vi, beforeEach, afterEach } from 'vitest';
|
||||
import { writeFileSync, mkdirSync, unlinkSync, rmSync } from 'node:fs';
|
||||
import { writeFileSync, mkdirSync, unlinkSync, rmSync, readFileSync, existsSync } from 'node:fs';
|
||||
import { resolve } from 'node:path';
|
||||
import { tmpdir } from 'node:os';
|
||||
import { tmpdir, homedir } from 'node:os';
|
||||
import { execSync } from 'node:child_process';
|
||||
import { HeartbeatService, type HeartbeatConfig } from './heartbeat.js';
|
||||
import { buildCustomHeartbeatPrompt, SILENT_MODE_PREFIX } from '../core/prompts.js';
|
||||
import type { AgentSession } from '../core/interfaces.js';
|
||||
import { addTodo } from '../todo/store.js';
|
||||
import { getCronLogPath } from '../utils/paths.js';
|
||||
|
||||
const HEARTBEAT_LOG_PATH = getCronLogPath();
|
||||
|
||||
// ── buildCustomHeartbeatPrompt ──────────────────────────────────────────
|
||||
|
||||
@@ -271,3 +275,143 @@ describe('HeartbeatService prompt resolution', () => {
|
||||
expect(bot.sendToAgent).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ── Memfs health check ─────────────────────────────────────────────────
|
||||
|
||||
describe('HeartbeatService memfs health check', () => {
|
||||
let tmpDir: string;
|
||||
let memDir: string | undefined;
|
||||
let originalDataDir: string | undefined;
|
||||
let originalHome: string | undefined;
|
||||
let testHome: string;
|
||||
|
||||
beforeEach(() => {
|
||||
tmpDir = resolve(tmpdir(), `heartbeat-memfs-test-${Date.now()}`);
|
||||
testHome = resolve(tmpDir, 'fake-home');
|
||||
mkdirSync(tmpDir, { recursive: true });
|
||||
mkdirSync(testHome, { recursive: true });
|
||||
originalDataDir = process.env.DATA_DIR;
|
||||
originalHome = process.env.HOME;
|
||||
process.env.DATA_DIR = tmpDir;
|
||||
process.env.HOME = testHome;
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
if (originalDataDir === undefined) {
|
||||
delete process.env.DATA_DIR;
|
||||
} else {
|
||||
process.env.DATA_DIR = originalDataDir;
|
||||
}
|
||||
if (originalHome === undefined) {
|
||||
delete process.env.HOME;
|
||||
} else {
|
||||
process.env.HOME = originalHome;
|
||||
}
|
||||
try { rmSync(tmpDir, { recursive: true, force: true }); } catch { /* ignore */ }
|
||||
memDir = undefined;
|
||||
});
|
||||
|
||||
it('emits heartbeat_memfs_dirty when memfs directory has untracked files', async () => {
|
||||
// Set up a real git repo to act as the memory directory
|
||||
const agentId = 'agent-memfs-test-' + Date.now();
|
||||
memDir = resolve(homedir(), '.letta', 'agents', agentId, 'memory');
|
||||
mkdirSync(memDir, { recursive: true });
|
||||
execSync('git init', { cwd: memDir, stdio: 'ignore' });
|
||||
// Create an untracked file
|
||||
writeFileSync(resolve(memDir, 'untracked.md'), 'test');
|
||||
|
||||
const bot = createMockBot();
|
||||
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||
agentId,
|
||||
conversationId: null,
|
||||
channels: [],
|
||||
});
|
||||
|
||||
const service = new HeartbeatService(bot, createConfig({
|
||||
workingDir: tmpDir,
|
||||
memfs: true,
|
||||
}));
|
||||
|
||||
// Access private method for direct testing
|
||||
const checkMemfsHealth = (service as any).checkMemfsHealth.bind(service);
|
||||
|
||||
expect(() => checkMemfsHealth()).not.toThrow();
|
||||
await new Promise((resolvePromise) => setTimeout(resolvePromise, 10));
|
||||
|
||||
const logContents = existsSync(HEARTBEAT_LOG_PATH)
|
||||
? readFileSync(HEARTBEAT_LOG_PATH, 'utf-8')
|
||||
: '';
|
||||
expect(logContents).toContain('heartbeat_memfs_dirty');
|
||||
expect(logContents).toContain(agentId);
|
||||
});
|
||||
|
||||
it('skips memfs check when memfs is disabled', async () => {
|
||||
const bot = createMockBot();
|
||||
const service = new HeartbeatService(bot, createConfig({
|
||||
workingDir: tmpDir,
|
||||
memfs: false,
|
||||
}));
|
||||
|
||||
const getMemoryDir = (service as any).getMemoryDir.bind(service);
|
||||
expect(getMemoryDir()).toBeNull();
|
||||
});
|
||||
|
||||
it('skips memfs check when agent ID is not available', async () => {
|
||||
const bot = createMockBot();
|
||||
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||
agentId: null,
|
||||
conversationId: null,
|
||||
channels: [],
|
||||
});
|
||||
|
||||
const service = new HeartbeatService(bot, createConfig({
|
||||
workingDir: tmpDir,
|
||||
memfs: true,
|
||||
}));
|
||||
|
||||
const getMemoryDir = (service as any).getMemoryDir.bind(service);
|
||||
expect(getMemoryDir()).toBeNull();
|
||||
});
|
||||
|
||||
it('resolves memory directory correctly when memfs is enabled', () => {
|
||||
const bot = createMockBot();
|
||||
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||
agentId: 'agent-abc123',
|
||||
conversationId: null,
|
||||
channels: [],
|
||||
});
|
||||
|
||||
const service = new HeartbeatService(bot, createConfig({
|
||||
workingDir: tmpDir,
|
||||
memfs: true,
|
||||
}));
|
||||
|
||||
const getMemoryDir = (service as any).getMemoryDir.bind(service);
|
||||
expect(getMemoryDir()).toBe(resolve(homedir(), '.letta', 'agents', 'agent-abc123', 'memory'));
|
||||
});
|
||||
|
||||
it('still calls sendToAgent even when memfs check finds dirty files', async () => {
|
||||
const agentId = 'agent-memfs-dirty-' + Date.now();
|
||||
memDir = resolve(homedir(), '.letta', 'agents', agentId, 'memory');
|
||||
mkdirSync(memDir, { recursive: true });
|
||||
execSync('git init', { cwd: memDir, stdio: 'ignore' });
|
||||
writeFileSync(resolve(memDir, 'dirty.md'), 'uncommitted content');
|
||||
|
||||
const bot = createMockBot();
|
||||
(bot.getStatus as ReturnType<typeof vi.fn>).mockReturnValue({
|
||||
agentId,
|
||||
conversationId: null,
|
||||
channels: [],
|
||||
});
|
||||
|
||||
const service = new HeartbeatService(bot, createConfig({
|
||||
workingDir: tmpDir,
|
||||
memfs: true,
|
||||
}));
|
||||
|
||||
await service.trigger();
|
||||
|
||||
// sendToAgent should still be called (memfs check is non-blocking)
|
||||
expect(bot.sendToAgent).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -7,8 +7,10 @@
|
||||
* The agent must use `lettabot-message` CLI via Bash to contact the user.
|
||||
*/
|
||||
|
||||
import { appendFileSync, mkdirSync, readFileSync } from 'node:fs';
|
||||
import { resolve, dirname } from 'node:path';
|
||||
import { appendFileSync, existsSync, mkdirSync, readFileSync } from 'node:fs';
|
||||
import { resolve, dirname, join } from 'node:path';
|
||||
import { homedir } from 'node:os';
|
||||
import { execFileSync } from 'node:child_process';
|
||||
import type { AgentSession } from '../core/interfaces.js';
|
||||
import type { TriggerContext } from '../core/types.js';
|
||||
import { buildHeartbeatPrompt, buildCustomHeartbeatPrompt } from '../core/prompts.js';
|
||||
@@ -49,6 +51,9 @@ export interface HeartbeatConfig {
|
||||
workingDir: string;
|
||||
agentKey: string;
|
||||
|
||||
// Whether memfs (git-backed memory filesystem) is enabled for this agent
|
||||
memfs?: boolean;
|
||||
|
||||
// Custom heartbeat prompt (optional)
|
||||
prompt?: string;
|
||||
|
||||
@@ -82,6 +87,57 @@ export class HeartbeatService {
|
||||
}
|
||||
return Math.floor(raw * 60 * 1000);
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the memory directory for this agent.
|
||||
* Returns null if memfs is disabled or agent ID is unavailable.
|
||||
*/
|
||||
private getMemoryDir(): string | null {
|
||||
if (!this.config.memfs) return null;
|
||||
const agentId = this.bot.getStatus().agentId;
|
||||
if (!agentId) return null;
|
||||
return join(homedir(), '.letta', 'agents', agentId, 'memory');
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the memfs git repo has untracked or uncommitted files.
|
||||
* Logs a warning if it does. Non-fatal: heartbeat proceeds regardless.
|
||||
*/
|
||||
private checkMemfsHealth(): void {
|
||||
const memoryDir = this.getMemoryDir();
|
||||
if (!memoryDir) return;
|
||||
|
||||
if (!existsSync(memoryDir)) {
|
||||
log.debug(`Memory directory does not exist yet: ${memoryDir}`);
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
const output = execFileSync('git', ['status', '--porcelain'], {
|
||||
cwd: memoryDir,
|
||||
encoding: 'utf-8',
|
||||
timeout: 5000,
|
||||
}).trim();
|
||||
|
||||
if (output) {
|
||||
const lines = output.split('\n');
|
||||
log.warn(
|
||||
`Memory directory has ${lines.length} uncommitted/untracked file(s). ` +
|
||||
`This may cause heartbeat failures. Run "cd ${memoryDir} && git add -A && git commit -m 'sync'" to fix. ` +
|
||||
`Files: ${lines.slice(0, 5).join(', ')}${lines.length > 5 ? ` (and ${lines.length - 5} more)` : ''}`,
|
||||
);
|
||||
logEvent('heartbeat_memfs_dirty', {
|
||||
memoryDir,
|
||||
fileCount: lines.length,
|
||||
files: lines.slice(0, 10),
|
||||
});
|
||||
}
|
||||
} catch (err) {
|
||||
log.warn(
|
||||
`Failed to check memfs health in ${memoryDir}: ${err instanceof Error ? err.message : String(err)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Start the heartbeat timer
|
||||
@@ -168,6 +224,9 @@ export class HeartbeatService {
|
||||
}
|
||||
}
|
||||
|
||||
// Pre-flight: check for dirty memfs state that could cause session init failures
|
||||
this.checkMemfsHealth();
|
||||
|
||||
log.info(`Sending heartbeat to agent...`);
|
||||
|
||||
logEvent('heartbeat_running', {
|
||||
@@ -226,10 +285,22 @@ export class HeartbeatService {
|
||||
});
|
||||
|
||||
} catch (error) {
|
||||
const errorMsg = error instanceof Error ? error.message : String(error);
|
||||
log.error('Error:', error);
|
||||
logEvent('heartbeat_error', {
|
||||
error: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
|
||||
// Surface git/memfs-related errors with actionable diagnostics
|
||||
if (/\b(git|memfs|memory)\b/i.test(errorMsg)) {
|
||||
const memoryDir = this.getMemoryDir();
|
||||
log.warn(
|
||||
`Heartbeat failed due to a git/memfs error. ` +
|
||||
`This often happens when the memory directory has untracked or uncommitted files. ` +
|
||||
(memoryDir
|
||||
? `Check: cd ${memoryDir} && git status`
|
||||
: `Enable memfs or check LETTA_AGENT_ID to diagnose.`),
|
||||
);
|
||||
}
|
||||
|
||||
logEvent('heartbeat_error', { error: errorMsg });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -455,6 +455,7 @@ async function main() {
|
||||
intervalMinutes: heartbeatConfig?.intervalMin ?? 240,
|
||||
skipRecentUserMinutes: heartbeatConfig?.skipRecentUserMin ?? globalConfig.heartbeatSkipRecentUserMin,
|
||||
agentKey: agentConfig.name,
|
||||
memfs: resolvedMemfs,
|
||||
prompt: heartbeatConfig?.prompt || process.env.HEARTBEAT_PROMPT,
|
||||
promptFile: heartbeatConfig?.promptFile,
|
||||
workingDir: resolvedWorkingDir,
|
||||
|
||||
Reference in New Issue
Block a user