Files
letta-code/src/agent/available-models.ts
2026-02-17 21:27:13 -08:00

195 lines
5.0 KiB
TypeScript

import { debugWarn } from "../utils/debug";
import { getClient } from "./client";
const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes
type CacheEntry = {
handles: Set<string>;
contextWindows: Map<string, number>; // handle -> max_context_window
fetchedAt: number;
};
let cache: CacheEntry | null = null;
let inflight: Promise<CacheEntry> | null = null;
function isFresh(now = Date.now()) {
return cache !== null && now - cache.fetchedAt < CACHE_TTL_MS;
}
export type AvailableModelHandlesResult = {
handles: Set<string>;
source: "cache" | "network";
fetchedAt: number;
};
export function clearAvailableModelsCache() {
cache = null;
}
export function getAvailableModelsCacheInfo(): {
hasCache: boolean;
isFresh: boolean;
fetchedAt: number | null;
ageMs: number | null;
ttlMs: number;
} {
const now = Date.now();
return {
hasCache: cache !== null,
isFresh: isFresh(now),
fetchedAt: cache?.fetchedAt ?? null,
ageMs: cache ? now - cache.fetchedAt : null,
ttlMs: CACHE_TTL_MS,
};
}
/**
* Return cached model handles if available.
* Used by UI components to bootstrap from cache without showing a loading flash.
*/
export function getCachedModelHandles(): Set<string> | null {
if (!cache) {
return null;
}
return new Set(cache.handles);
}
/**
* Provider response from /v1/providers/ endpoint
*/
type Provider = {
id: string;
name: string;
provider_type: string;
provider_category?: "base" | "byok" | null;
};
/**
* Refresh BYOK providers to get the latest models from their APIs.
* This calls PATCH /v1/providers/{provider_id}/refresh for each BYOK provider.
* Errors are logged but don't fail the overall refresh (best-effort).
*/
async function refreshByokProviders(): Promise<void> {
const client = await getClient();
try {
// List all providers
const providers = await client.get<Provider[]>("/v1/providers/");
// Filter to BYOK providers only
const byokProviders = providers.filter(
(p) => p.provider_category === "byok",
);
// Refresh each BYOK provider in parallel (best-effort, don't fail on errors)
await Promise.allSettled(
byokProviders.map(async (provider) => {
try {
await client.patch<Provider>(`/v1/providers/${provider.id}/refresh`);
} catch (error) {
// Log but don't throw - refresh is best-effort
debugWarn(
"available-models",
`Failed to refresh provider ${provider.name} (${provider.id}):`,
error,
);
}
}),
);
} catch (error) {
// If we can't list providers, just log and continue
// This might happen on self-hosted servers without the providers endpoint
debugWarn(
"available-models",
"Failed to list providers for refresh:",
error,
);
}
}
async function fetchFromNetwork(): Promise<CacheEntry> {
const client = await getClient();
const modelsList = await client.models.list();
const handles = new Set(
modelsList.map((m) => m.handle).filter((h): h is string => !!h),
);
// Build context window map from API response
const contextWindows = new Map<string, number>();
for (const model of modelsList) {
if (model.handle && model.max_context_window) {
contextWindows.set(model.handle, model.max_context_window);
}
}
return { handles, contextWindows, fetchedAt: Date.now() };
}
export async function getAvailableModelHandles(options?: {
forceRefresh?: boolean;
}): Promise<AvailableModelHandlesResult> {
const forceRefresh = options?.forceRefresh === true;
const now = Date.now();
if (!forceRefresh && isFresh(now) && cache) {
return {
handles: cache.handles,
source: "cache",
fetchedAt: cache.fetchedAt,
};
}
if (!forceRefresh && inflight) {
const entry = await inflight;
return {
handles: entry.handles,
source: "network",
fetchedAt: entry.fetchedAt,
};
}
// When forceRefresh is true, first refresh BYOK providers to get latest models
// This matches the behavior in ADE (letta-cloud) where refresh is called before listing models
if (forceRefresh) {
await refreshByokProviders();
}
inflight = fetchFromNetwork()
.then((entry) => {
cache = entry;
return entry;
})
.finally(() => {
inflight = null;
});
const entry = await inflight;
return {
handles: entry.handles,
source: "network",
fetchedAt: entry.fetchedAt,
};
}
/**
* Best-effort prefetch to warm the cache (no throw).
* This is intentionally fire-and-forget.
*/
export function prefetchAvailableModelHandles(): void {
void getAvailableModelHandles().catch(() => {
// Ignore failures; UI will handle errors on-demand.
});
}
/**
* Get the max_context_window for a model handle from the API.
* Ensures the cache is populated before reading.
* Returns undefined if handle not found in the API response.
*/
export async function getModelContextWindow(
handle: string,
): Promise<number | undefined> {
if (!cache) {
await getAvailableModelHandles();
}
return cache?.contextWindows.get(handle);
}