feat: default agents and subagents to auto model (#1392)

Co-authored-by: Letta Code <noreply@letta.com> Co-authored-by: Ari Webb <ari@letta.com>
2026-03-16 16:50:01 -07:00
parent 0aeb24b51d
commit c2a1312811
10 changed files with 442 additions and 73 deletions
--- a/src/agent/create.ts
+++ b/src/agent/create.ts
@@ -6,7 +6,7 @@ import type {
  AgentState,
  AgentType,
 } from "@letta-ai/letta-client/resources/agents/agents";
-import { DEFAULT_AGENT_NAME } from "../constants";
+import { DEFAULT_AGENT_NAME, DEFAULT_SUMMARIZATION_MODEL } from "../constants";
 import { settingsManager } from "../settings-manager";
 import { getModelContextWindow } from "./available-models";
 import { getClient, getServerUrl } from "./client";
@@ -221,44 +221,8 @@ export async function createAgent(
  // Only attach server-side tools to the agent.
  // Client-side tools (Read, Write, Bash, etc.) are passed via client_tools at runtime,
  // NOT attached to the agent. This is the new pattern - no more stub tool registration.
-  const { isOpenAIModel } = await import("../tools/manager");
-  const baseMemoryTool = isOpenAIModel(modelHandle)
-    ? "memory_apply_patch"
-    : "memory";
-  const defaultBaseTools = options.baseTools ?? [
-    baseMemoryTool,
-    "web_search",
-    "fetch_webpage",
-  ];
-
-  let toolNames = [...defaultBaseTools];
-
-  // Fallback: if server doesn't have memory_apply_patch, use legacy memory tool
-  if (toolNames.includes("memory_apply_patch")) {
-    try {
-      const resp = await client.tools.list({ name: "memory_apply_patch" });
-      const hasMemoryApplyPatch =
-        Array.isArray(resp.items) && resp.items.length > 0;
-      if (!hasMemoryApplyPatch) {
-        console.warn(
-          "memory_apply_patch tool not found on server; falling back to 'memory' tool",
-        );
-        toolNames = toolNames.map((n) =>
-          n === "memory_apply_patch" ? "memory" : n,
-        );
-      }
-    } catch (err) {
-      // If the capability check fails for any reason, conservatively fall back to 'memory'
-      console.warn(
-        `Unable to verify memory_apply_patch availability (falling back to 'memory'): ${
-          err instanceof Error ? err.message : String(err)
-        }`,
-      );
-      toolNames = toolNames.map((n) =>
-        n === "memory_apply_patch" ? "memory" : n,
-      );
-    }
-  }
+  const defaultBaseTools = options.baseTools ?? ["web_search", "fetch_webpage"];
+  const toolNames = [...defaultBaseTools];

  // Determine which memory blocks to use:
  // 1. If options.memoryBlocks is provided, use those (custom blocks and/or block references)
@@ -403,6 +367,9 @@ export async function createAgent(
    initial_message_sequence: [],
    parallel_tool_calls: parallelToolCallsVal,
    enable_sleeptime: enableSleeptimeVal,
+    compaction_settings: {
+      model: DEFAULT_SUMMARIZATION_MODEL,
+    },
  };

  const createWithTools = (tools: string[]) =>
@@ -417,8 +384,6 @@ export async function createAgent(
    addBaseToolsToServer,
  );

-  // Note: Preflight check above falls back to 'memory' when 'memory_apply_patch' is unavailable.
-
  // Apply updateArgs if provided (e.g., context_window, reasoning_effort, verbosity, etc.).
  // Also apply tier defaults from models.json when the caller explicitly selected a model.
  //
--- a/src/agent/model.ts
+++ b/src/agent/model.ts
@@ -77,6 +77,10 @@ export function resolveModel(modelIdentifier: string): string | null {
 * Get the default model handle
 */
 export function getDefaultModel(): string {
+  // Prefer Auto when available in models.json.
+  const autoModel = resolveModel("auto");
+  if (autoModel) return autoModel;
+
  const defaultModel = models.find((m) => m.isDefault);
  if (defaultModel) return defaultModel.handle;

@@ -89,17 +93,12 @@ export function getDefaultModel(): string {

 /**
 * Get the default model handle based on billing tier.
- * Free tier users get GLM-5, everyone else gets the standard default.
+ * All tiers use the same default selection path.
 * @param billingTier - The user's billing tier (e.g., "free", "pro", "enterprise")
 * @returns The model handle to use as default
 */
 export function getDefaultModelForTier(billingTier?: string | null): string {
-  // Free tier gets GLM-5.
-  if (billingTier?.toLowerCase() === "free") {
-    const freeDefault = models.find((m) => m.id === "glm-5");
-    if (freeDefault) return freeDefault.handle;
-  }
-  // Everyone else (pro, enterprise, unknown) gets the standard default
+  void billingTier;
  return getDefaultModel();
 }

--- a/src/agent/reconcileExistingAgentState.ts
+++ b/src/agent/reconcileExistingAgentState.ts
@@ -0,0 +1,194 @@
+import type {
+  AgentState,
+  AgentUpdateParams,
+} from "@letta-ai/letta-client/resources/agents/agents";
+import type { Tool } from "@letta-ai/letta-client/resources/tools";
+import { DEFAULT_SUMMARIZATION_MODEL } from "../constants";
+
+export const DEFAULT_ATTACHED_BASE_TOOLS = [
+  "web_search",
+  "fetch_webpage",
+] as const;
+
+type AgentStateReconcileClient = {
+  agents: {
+    update: (agentID: string, body: AgentUpdateParams) => Promise<AgentState>;
+  };
+  tools: {
+    list: (query?: { name?: string | null; limit?: number | null }) => Promise<{
+      items: Tool[];
+    }>;
+  };
+};
+
+export interface ReconcileAgentStateResult {
+  updated: boolean;
+  agent: AgentState;
+  appliedTweaks: string[];
+  skippedTweaks: string[];
+}
+
+function areToolSetsEqual(
+  currentToolIds: string[],
+  desiredToolIds: string[],
+): boolean {
+  if (currentToolIds.length !== desiredToolIds.length) {
+    return false;
+  }
+
+  const currentSet = new Set(currentToolIds);
+  for (const toolId of desiredToolIds) {
+    if (!currentSet.has(toolId)) {
+      return false;
+    }
+  }
+  return true;
+}
+
+function getToolName(tool: Tool): string {
+  if (typeof tool.name !== "string") {
+    return "";
+  }
+  return tool.name.trim();
+}
+
+function getAttachedToolIdsByName(agent: AgentState): Map<string, string> {
+  const toolIdsByName = new Map<string, string>();
+  for (const tool of agent.tools ?? []) {
+    const name = getToolName(tool);
+    if (!name || !tool.id || toolIdsByName.has(name)) {
+      continue;
+    }
+    toolIdsByName.set(name, tool.id);
+  }
+  return toolIdsByName;
+}
+
+async function resolveToolIdByName(
+  client: AgentStateReconcileClient,
+  toolName: string,
+): Promise<string | null> {
+  const response = await client.tools.list({
+    name: toolName,
+    limit: 10,
+  });
+
+  if (!Array.isArray(response.items) || response.items.length === 0) {
+    return null;
+  }
+
+  const exactMatch = response.items.find(
+    (tool) => getToolName(tool) === toolName,
+  );
+  const match = exactMatch ?? response.items[0];
+  return match?.id ?? null;
+}
+
+async function resolveDesiredAttachedToolIds(
+  client: AgentStateReconcileClient,
+  agent: AgentState,
+  desiredToolNames: readonly string[],
+): Promise<{ toolIds: string[] | null; missingToolNames: string[] }> {
+  const attachedByName = getAttachedToolIdsByName(agent);
+  const resolvedByName = new Map<string, string>();
+  const missingToolNames: string[] = [];
+
+  await Promise.all(
+    desiredToolNames.map(async (toolName) => {
+      const existingId = attachedByName.get(toolName);
+      if (existingId) {
+        resolvedByName.set(toolName, existingId);
+        return;
+      }
+
+      try {
+        const resolvedId = await resolveToolIdByName(client, toolName);
+        if (resolvedId) {
+          resolvedByName.set(toolName, resolvedId);
+          return;
+        }
+      } catch {
+        // Treat as missing; caller decides whether to skip this tweak.
+      }
+
+      missingToolNames.push(toolName);
+    }),
+  );
+
+  if (missingToolNames.length > 0) {
+    return {
+      toolIds: null,
+      missingToolNames,
+    };
+  }
+
+  const toolIds = desiredToolNames
+    .map((toolName) => resolvedByName.get(toolName))
+    .filter((toolId): toolId is string => Boolean(toolId));
+
+  return {
+    toolIds,
+    missingToolNames: [],
+  };
+}
+
+export async function reconcileExistingAgentState(
+  client: AgentStateReconcileClient,
+  agent: AgentState,
+): Promise<ReconcileAgentStateResult> {
+  const patch: AgentUpdateParams = {};
+  const appliedTweaks: string[] = [];
+  const skippedTweaks: string[] = [];
+
+  const configuredCompactionModel =
+    typeof agent.compaction_settings?.model === "string"
+      ? agent.compaction_settings.model.trim()
+      : "";
+
+  if (!configuredCompactionModel) {
+    patch.compaction_settings = {
+      ...(agent.compaction_settings ?? {}),
+      model: DEFAULT_SUMMARIZATION_MODEL,
+    };
+    appliedTweaks.push("set_compaction_model");
+  }
+
+  const desiredToolNames = DEFAULT_ATTACHED_BASE_TOOLS;
+  const desiredTools = await resolveDesiredAttachedToolIds(
+    client,
+    agent,
+    desiredToolNames,
+  );
+
+  if (desiredTools.missingToolNames.length > 0 || !desiredTools.toolIds) {
+    skippedTweaks.push(
+      `sync_attached_tools_missing:${desiredTools.missingToolNames.join(",")}`,
+    );
+  } else {
+    const currentToolIds = (agent.tools ?? [])
+      .map((tool) => tool.id)
+      .filter((toolId): toolId is string => Boolean(toolId));
+
+    if (!areToolSetsEqual(currentToolIds, desiredTools.toolIds)) {
+      patch.tool_ids = desiredTools.toolIds;
+      appliedTweaks.push("sync_attached_tools");
+    }
+  }
+
+  if (appliedTweaks.length === 0) {
+    return {
+      updated: false,
+      agent,
+      appliedTweaks,
+      skippedTweaks,
+    };
+  }
+
+  const updatedAgent = await client.agents.update(agent.id, patch);
+  return {
+    updated: true,
+    agent: updatedAgent,
+    appliedTweaks,
+    skippedTweaks,
+  };
+}
--- a/src/agent/subagents/manager.ts
+++ b/src/agent/subagents/manager.ts
@@ -157,6 +157,7 @@ export async function resolveSubagentModel(options: {
 }): Promise<string | null> {
  const { userModel, recommendedModel, parentModelHandle, billingTier } =
    options;
+  const isFreeTier = billingTier?.toLowerCase() === "free";

  if (userModel) return userModel;

@@ -165,12 +166,6 @@ export async function resolveSubagentModel(options: {
    recommendedHandle = resolveModel(recommendedModel);
  }

-  // Free-tier users should default subagents to GLM-5 instead of provider-specific
-  // recommendations like Sonnet.
-  if (recommendedModel !== "inherit" && billingTier?.toLowerCase() === "free") {
-    recommendedHandle = getDefaultModelForTier(billingTier);
-  }
-
  let availableHandles: Set<string> | null = options.availableHandles ?? null;
  const isAvailable = async (handle: string): Promise<boolean> => {
    try {
@@ -184,6 +179,20 @@ export async function resolveSubagentModel(options: {
    }
  };

+  // Free-tier default for subagents: auto-fast, when available.
+  const freeTierDefaultHandle = isFreeTier ? resolveModel("auto-fast") : null;
+  if (freeTierDefaultHandle && (await isAvailable(freeTierDefaultHandle))) {
+    return freeTierDefaultHandle;
+  }
+
+  // Free-tier fallback default: auto, when available.
+  if (isFreeTier) {
+    const defaultHandle = getDefaultModelForTier(billingTier);
+    if (defaultHandle && (await isAvailable(defaultHandle))) {
+      return defaultHandle;
+    }
+  }
+
  if (parentModelHandle) {
    const parentProvider = getProviderPrefix(parentModelHandle);
    const parentBaseProvider = parentProvider
@@ -224,6 +233,12 @@ export async function resolveSubagentModel(options: {
    return recommendedHandle;
  }

+  // Non-free fallback default: auto, when available.
+  const defaultHandle = getDefaultModelForTier(billingTier);
+  if (defaultHandle && (await isAvailable(defaultHandle))) {
+    return defaultHandle;
+  }
+
  return recommendedHandle;
 }

--- a/src/cli/App.tsx
+++ b/src/cli/App.tsx
@@ -67,9 +67,11 @@ import {
  INTERRUPT_RECOVERY_ALERT,
  shouldRecommendDefaultPrompt,
 } from "../agent/promptAssets";
+import { reconcileExistingAgentState } from "../agent/reconcileExistingAgentState";
 import { recordSessionEnd } from "../agent/sessionHistory";
 import { SessionStats } from "../agent/stats";
 import {
+  DEFAULT_SUMMARIZATION_MODEL,
  INTERRUPTED_BY_USER,
  MEMFS_CONFLICT_CHECK_INTERVAL,
  SYSTEM_ALERT_CLOSE,
@@ -3146,11 +3148,14 @@ export default function App({
  // Fetch llmConfig when agent is ready
  useEffect(() => {
    if (loadingState === "ready" && agentId && agentId !== "loading") {
+      let cancelled = false;
+
      const fetchConfig = async () => {
        try {
          const { getClient } = await import("../agent/client");
          const client = await getClient();
          const agent = await client.agents.retrieve(agentId);
+
          setAgentState(agent);
          setLlmConfig(agent.llm_config);
          setAgentDescription(agent.description ?? null);
@@ -3212,8 +3217,11 @@ export default function App({
            setCurrentSystemPromptId("custom");
          }
          // Get last message timestamp from agent state if available
-          const lastRunCompletion = (agent as { last_run_completion?: string })
-            .last_run_completion;
+          const lastRunCompletion = (
+            agent as {
+              last_run_completion?: string;
+            }
+          ).last_run_completion;
          setAgentLastRunAt(lastRunCompletion ?? null);

          // Derive model ID from llm_config for ModelSelector
@@ -3259,11 +3267,38 @@ export default function App({
            await forceToolsetSwitch(persistedToolsetPreference, agentId);
            setCurrentToolset(persistedToolsetPreference);
          }
+
+          void reconcileExistingAgentState(client, agent)
+            .then((reconcileResult) => {
+              if (!reconcileResult.updated || cancelled) {
+                return;
+              }
+              if (agentIdRef.current !== agent.id) {
+                return;
+              }
+
+              setAgentState(reconcileResult.agent);
+              setAgentDescription(reconcileResult.agent.description ?? null);
+            })
+            .catch((reconcileError) => {
+              debugWarn(
+                "agent-config",
+                `Failed to reconcile existing agent settings for ${agentId}: ${
+                  reconcileError instanceof Error
+                    ? reconcileError.message
+                    : String(reconcileError)
+                }`,
+              );
+            });
        } catch (error) {
          debugLog("agent-config", "Error fetching agent config: %O", error);
        }
      };
      fetchConfig();
+
+      return () => {
+        cancelled = true;
+      };
    }
  }, [loadingState, agentId]);

@@ -8294,6 +8329,9 @@ export default function App({
              ? {
                  compaction_settings: {
                    mode: modeArg,
+                    model:
+                      agentStateRef.current?.compaction_settings?.model?.trim() ||
+                      DEFAULT_SUMMARIZATION_MODEL,
                  },
                }
              : undefined;
@@ -11785,14 +11823,15 @@ ${SYSTEM_REMINDER_CLOSE}
        try {
          const client = await getClient();
          // Spread existing compaction_settings to preserve model/other fields,
-          // only override the mode. If no existing settings, use empty model
-          // string which tells the backend to use its default lightweight model.
+          // only override the mode. If no model is configured, default to
+          // letta/auto so compaction uses a consistent summarization model.
          const existing = agentState?.compaction_settings;
+          const existingModel = existing?.model?.trim();

          await client.agents.update(agentId, {
            compaction_settings: {
-              model: existing?.model ?? "",
              ...existing,
+              model: existingModel || DEFAULT_SUMMARIZATION_MODEL,
              mode: mode as
                | "all"
                | "sliding_window"
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -7,6 +7,11 @@
 */
 export const DEFAULT_MODEL_ID = "sonnet";

+/**
+ * Default model handle to use for conversation compaction / summarization.
+ */
+export const DEFAULT_SUMMARIZATION_MODEL = "letta/auto";
+
 /**
 * Default agent name when creating a new agent
 */
--- a/src/models.json
+++ b/src/models.json
@@ -3,7 +3,7 @@
    {
      "id": "auto",
      "handle": "letta/auto",
-      "label": "Auto (Beta)",
+      "label": "Auto",
      "description": "Automatically select the best model",
      "isFeatured": true,
      "free": true
@@ -11,7 +11,7 @@
    {
      "id": "auto-fast",
      "handle": "letta/auto-fast",
-      "label": "Auto Fast (Beta)",
+      "label": "Auto Fast",
      "description": "Automatically select the best fast model",
      "isFeatured": true,
      "free": true
--- a/src/tests/agent/default-model-for-tier.test.ts
+++ b/src/tests/agent/default-model-for-tier.test.ts
@@ -3,12 +3,12 @@ import { describe, expect, test } from "bun:test";
 import { getDefaultModel, getDefaultModelForTier } from "../../agent/model";

 describe("getDefaultModelForTier", () => {
-  test("returns GLM-5 for free tier", () => {
-    expect(getDefaultModelForTier("free")).toBe("zai/glm-5");
+  test("returns the default model for free tier", () => {
+    expect(getDefaultModelForTier("free")).toBe(getDefaultModel());
  });

  test("is case-insensitive for free tier", () => {
-    expect(getDefaultModelForTier("FrEe")).toBe("zai/glm-5");
+    expect(getDefaultModelForTier("FrEe")).toBe(getDefaultModel());
  });

  test("returns standard default for non-free tiers", () => {
--- a/src/tests/agent/reconcile-existing-agent-state.test.ts
+++ b/src/tests/agent/reconcile-existing-agent-state.test.ts
@@ -0,0 +1,128 @@
+import { describe, expect, mock, test } from "bun:test";
+import type {
+  AgentState,
+  AgentUpdateParams,
+} from "@letta-ai/letta-client/resources/agents/agents";
+import type { Tool } from "@letta-ai/letta-client/resources/tools";
+import {
+  DEFAULT_ATTACHED_BASE_TOOLS,
+  reconcileExistingAgentState,
+} from "../../agent/reconcileExistingAgentState";
+
+function mkTool(id: string, name: string): Tool {
+  return { id, name } as Tool;
+}
+
+function mkAgentState(overrides: Partial<AgentState>): AgentState {
+  return {
+    id: "agent-test",
+    tools: [],
+    name: "test-agent",
+    system: "system",
+    agent_type: "letta_v1_agent",
+    blocks: [],
+    llm_config: {} as AgentState["llm_config"],
+    memory: { blocks: [] } as AgentState["memory"],
+    sources: [],
+    tags: [],
+    ...overrides,
+  } as AgentState;
+}
+
+describe("reconcileExistingAgentState", () => {
+  test("does not update when compaction model and attached tools are already correct", async () => {
+    const agent = mkAgentState({
+      tools: [
+        mkTool("tool-web", "web_search"),
+        mkTool("tool-fetch", "fetch_webpage"),
+      ],
+      compaction_settings: {
+        model: "letta/auto",
+      },
+    });
+
+    const update = mock(() => Promise.resolve(agent));
+    const list = mock(() => Promise.resolve({ items: [] as Tool[] }));
+
+    const result = await reconcileExistingAgentState(
+      {
+        agents: { update },
+        tools: { list },
+      },
+      agent,
+    );
+
+    expect(result.updated).toBe(false);
+    expect(result.appliedTweaks).toEqual([]);
+    expect(update).not.toHaveBeenCalled();
+    expect(list).not.toHaveBeenCalled();
+  });
+
+  test("updates missing compaction model and enforces only default base tools", async () => {
+    const initialAgent = mkAgentState({
+      tools: [
+        mkTool("tool-web", "web_search"),
+        mkTool("tool-convo", "conversation_search"),
+      ],
+      compaction_settings: {
+        mode: "sliding_window",
+        model: "",
+      },
+    });
+
+    const updatedAgent = mkAgentState({
+      tools: [
+        mkTool("tool-web", "web_search"),
+        mkTool("tool-fetch", "fetch_webpage"),
+      ],
+      compaction_settings: {
+        mode: "sliding_window",
+        model: "letta/auto",
+      },
+    });
+
+    const update = mock((_agentID: string, _body: AgentUpdateParams) =>
+      Promise.resolve(updatedAgent),
+    );
+    const list = mock((query?: { name?: string | null }) => {
+      if (query?.name === "fetch_webpage") {
+        return Promise.resolve({
+          items: [mkTool("tool-fetch", "fetch_webpage")],
+        });
+      }
+      return Promise.resolve({ items: [] as Tool[] });
+    });
+
+    const result = await reconcileExistingAgentState(
+      {
+        agents: { update },
+        tools: { list },
+      },
+      initialAgent,
+    );
+
+    expect(result.updated).toBe(true);
+    expect(result.appliedTweaks).toEqual([
+      "set_compaction_model",
+      "sync_attached_tools",
+    ]);
+    expect(result.agent).toBe(updatedAgent);
+
+    expect(list).toHaveBeenCalledTimes(1);
+    expect(list).toHaveBeenCalledWith({ name: "fetch_webpage", limit: 10 });
+
+    expect(update).toHaveBeenCalledTimes(1);
+    expect(update).toHaveBeenCalledWith("agent-test", {
+      compaction_settings: {
+        mode: "sliding_window",
+        model: "letta/auto",
+      },
+      tool_ids: ["tool-web", "tool-fetch"],
+    });
+
+    expect(DEFAULT_ATTACHED_BASE_TOOLS).toEqual([
+      "web_search",
+      "fetch_webpage",
+    ]);
+  });
+});
--- a/src/tests/agent/subagent-model-resolution.test.ts
+++ b/src/tests/agent/subagent-model-resolution.test.ts
@@ -244,33 +244,57 @@ describe("resolveSubagentModel", () => {
    expect(result).toBe("lc-anthropic/parent-model");
  });

-  test("uses GLM-5 default for free tier even when subagent recommends another model", async () => {
+  test("uses auto default when available", async () => {
    const result = await resolveSubagentModel({
      recommendedModel: "sonnet-4.5",
-      billingTier: "free",
-      availableHandles: new Set(["zai/glm-5"]),
+      availableHandles: new Set(["letta/auto", "anthropic/test-model"]),
    });

-    expect(result).toBe("zai/glm-5");
+    expect(result).toBe("letta/auto");
  });

-  test("keeps inherit behavior for free tier", async () => {
+  test("uses auto-fast default for free tier when available", async () => {
+    const result = await resolveSubagentModel({
+      billingTier: "free",
+      availableHandles: new Set(["letta/auto-fast", "letta/auto"]),
+    });
+
+    expect(result).toBe("letta/auto-fast");
+  });
+
+  test("free tier falls back to auto when auto-fast is unavailable", async () => {
+    const result = await resolveSubagentModel({
+      billingTier: "free",
+      availableHandles: new Set(["letta/auto"]),
+    });
+
+    expect(result).toBe("letta/auto");
+  });
+
+  test("falls back when auto is unavailable", async () => {
+    const result = await resolveSubagentModel({
+      recommendedModel: "anthropic/test-model",
+      availableHandles: new Set(["anthropic/test-model"]),
+    });
+
+    expect(result).toBe("anthropic/test-model");
+  });
+
+  test("keeps inherit behavior when auto is unavailable", async () => {
    const result = await resolveSubagentModel({
      recommendedModel: "inherit",
      parentModelHandle: "openai/gpt-5",
-      billingTier: "free",
      availableHandles: new Set(["openai/gpt-5"]),
    });

    expect(result).toBe("openai/gpt-5");
  });

-  test("user-provided model still overrides free-tier default", async () => {
+  test("user-provided model still overrides default auto", async () => {
    const result = await resolveSubagentModel({
      userModel: "openai/gpt-5",
      recommendedModel: "sonnet-4.5",
-      billingTier: "free",
-      availableHandles: new Set(["zai/glm-5", "openai/gpt-5"]),
+      availableHandles: new Set(["letta/auto", "openai/gpt-5"]),
    });

    expect(result).toBe("openai/gpt-5");