import { debugWarn } from "../utils/debug"; import { getClient } from "./client"; const CACHE_TTL_MS = 5 * 60 * 1000; // 5 minutes type CacheEntry = { handles: Set; contextWindows: Map; // handle -> max_context_window fetchedAt: number; }; let cache: CacheEntry | null = null; let inflight: Promise | null = null; function isFresh(now = Date.now()) { return cache !== null && now - cache.fetchedAt < CACHE_TTL_MS; } export type AvailableModelHandlesResult = { handles: Set; source: "cache" | "network"; fetchedAt: number; }; export function clearAvailableModelsCache() { cache = null; } export function getAvailableModelsCacheInfo(): { hasCache: boolean; isFresh: boolean; fetchedAt: number | null; ageMs: number | null; ttlMs: number; } { const now = Date.now(); return { hasCache: cache !== null, isFresh: isFresh(now), fetchedAt: cache?.fetchedAt ?? null, ageMs: cache ? now - cache.fetchedAt : null, ttlMs: CACHE_TTL_MS, }; } /** * Return cached model handles if available. * Used by UI components to bootstrap from cache without showing a loading flash. */ export function getCachedModelHandles(): Set | null { if (!cache) { return null; } return new Set(cache.handles); } /** * Provider response from /v1/providers/ endpoint */ type Provider = { id: string; name: string; provider_type: string; provider_category?: "base" | "byok" | null; }; /** * Refresh BYOK providers to get the latest models from their APIs. * This calls PATCH /v1/providers/{provider_id}/refresh for each BYOK provider. * Errors are logged but don't fail the overall refresh (best-effort). */ async function refreshByokProviders(): Promise { const client = await getClient(); try { // List all providers const providers = await client.get("/v1/providers/"); // Filter to BYOK providers only const byokProviders = providers.filter( (p) => p.provider_category === "byok", ); // Refresh each BYOK provider in parallel (best-effort, don't fail on errors) await Promise.allSettled( byokProviders.map(async (provider) => { try { await client.patch(`/v1/providers/${provider.id}/refresh`); } catch (error) { // Log but don't throw - refresh is best-effort debugWarn( "available-models", `Failed to refresh provider ${provider.name} (${provider.id}):`, error, ); } }), ); } catch (error) { // If we can't list providers, just log and continue // This might happen on self-hosted servers without the providers endpoint debugWarn( "available-models", "Failed to list providers for refresh:", error, ); } } async function fetchFromNetwork(): Promise { const client = await getClient(); const modelsList = await client.models.list(); const handles = new Set( modelsList.map((m) => m.handle).filter((h): h is string => !!h), ); // Build context window map from API response const contextWindows = new Map(); for (const model of modelsList) { if (model.handle && model.max_context_window) { contextWindows.set(model.handle, model.max_context_window); } } return { handles, contextWindows, fetchedAt: Date.now() }; } export async function getAvailableModelHandles(options?: { forceRefresh?: boolean; }): Promise { const forceRefresh = options?.forceRefresh === true; const now = Date.now(); if (!forceRefresh && isFresh(now) && cache) { return { handles: cache.handles, source: "cache", fetchedAt: cache.fetchedAt, }; } if (!forceRefresh && inflight) { const entry = await inflight; return { handles: entry.handles, source: "network", fetchedAt: entry.fetchedAt, }; } // When forceRefresh is true, first refresh BYOK providers to get latest models // This matches the behavior in ADE (letta-cloud) where refresh is called before listing models if (forceRefresh) { await refreshByokProviders(); } inflight = fetchFromNetwork() .then((entry) => { cache = entry; return entry; }) .finally(() => { inflight = null; }); const entry = await inflight; return { handles: entry.handles, source: "network", fetchedAt: entry.fetchedAt, }; } /** * Best-effort prefetch to warm the cache (no throw). * This is intentionally fire-and-forget. */ export function prefetchAvailableModelHandles(): void { void getAvailableModelHandles().catch(() => { // Ignore failures; UI will handle errors on-demand. }); } /** * Get the max_context_window for a model handle from the API. * Ensures the cache is populated before reading. * Returns undefined if handle not found in the API response. */ export async function getModelContextWindow( handle: string, ): Promise { if (!cache) { await getAvailableModelHandles(); } return cache?.contextWindows.get(handle); }