Files
pieced-portal/src/lib/litellm.ts
admin d375a099f0
All checks were successful
Build and Push / build (push) Successful in 1m26s
Limit by tenant and org
2026-05-02 23:43:02 +02:00

282 lines
9.0 KiB
TypeScript

const LITELLM_URL =
process.env.LITELLM_INTERNAL_URL ?? "http://litellm.inference.svc:4000";
const LITELLM_MASTER_KEY = process.env.LITELLM_MASTER_KEY!;
async function litellmFetch(path: string, init?: RequestInit) {
const res = await fetch(`${LITELLM_URL}${path}`, {
...init,
headers: {
Authorization: `Bearer ${LITELLM_MASTER_KEY}`,
"Content-Type": "application/json",
...init?.headers,
},
});
if (!res.ok) {
throw new Error(`LiteLLM ${path}: ${res.status} ${await res.text()}`);
}
return res.json();
}
export async function getTeamInfo(teamId: string) {
return litellmFetch(`/team/info?team_id=${encodeURIComponent(teamId)}`);
}
export async function getTeamSpendLogs(
teamId: string,
startDate?: string,
endDate?: string
) {
const params = new URLSearchParams({ team_id: teamId });
if (startDate) params.set("start_date", startDate);
if (endDate) params.set("end_date", endDate);
return litellmFetch(`/global/spend/logs?${params}`);
}
/**
* Fetch one page of spend logs for a team, optionally narrowed to a
* single virtual key by alias.
*
* Slice 2 / Bug 19 context
* ------------------------
* Teams in LiteLLM are now org-scoped (one team per org), and each
* tenant in the org has its own virtual key with `key_alias = tenant
* CR name`. Without `keyAlias`, this returns the full team's spend —
* which mingles every tenant in the org. The portal's per-tenant
* usage view passes `keyAlias` to filter server-side via LiteLLM's
* native `key_alias` query param. Confirmed available on the
* `/spend/logs/v2` endpoint via OpenAPI introspection — no need to
* page-and-post-filter as the previous slice did.
*
* Why this matters
* ----------------
* Previous implementation fetched all team pages, then post-filtered
* by alias in JS. Two problems: (1) at any reasonable scale this is
* O(team_total) memory per request even when only one tenant's data
* is needed; (2) more importantly, when called from the customer
* dashboard without an explicit alias, the route's "pick the first
* visible tenant" fallback meant both Acme tenants showed identical
* numbers — the alias used was always the first tenant in the
* visible list, regardless of which tenant page was being viewed.
*
* The route layer above is responsible for resolving the tenant
* identity correctly and passing the right alias here. This
* function's only job is to pass it through to LiteLLM.
*/
export async function getTeamSpendLogsV2(
teamId: string,
startDate: string,
endDate: string,
page: number = 1,
pageSize: number = 100,
keyAlias?: string | null
) {
const params = new URLSearchParams({
team_id: teamId,
start_date: `${startDate} 00:00:00`,
end_date: `${endDate} 23:59:59`,
page: String(page),
page_size: String(pageSize),
});
if (keyAlias) {
params.set("key_alias", keyAlias);
}
return litellmFetch(`/spend/logs/v2?${params}`);
}
/**
* Get all teams registered in LiteLLM.
* Returns team_id, spend, max_budget, etc.
*/
export async function listTeams(): Promise<any[]> {
const data = await litellmFetch("/team/list");
// LiteLLM returns either an array or { data: [...] }
return Array.isArray(data) ? data : data?.data ?? data?.teams ?? [];
}
/**
* Find a virtual key on a team by its alias and return its current
* state (token, spend, budget cap, reset cadence). Returns null if
* the alias doesn't match any key on the team.
*
* Why we need this
* ----------------
* Per-tenant budgets live on the virtual key, not the team. The
* portal needs to:
* 1. Display the current key's `max_budget` / `budget_duration` /
* `spend` on the tenant detail page.
* 2. Pass the key's `token` to `/key/update` when the customer
* changes the budget.
*
* The token is opaque to the customer; the operator's
* `FindKeyByAlias` does the same lookup for stale-key cleanup. We
* mirror its API call here.
*/
export async function findKeyByAlias(
teamId: string,
keyAlias: string
): Promise<{
token: string;
spend: number;
maxBudget: number | null;
budgetDuration: string | null;
} | null> {
const data = await litellmFetch(
`/key/list?team_id=${encodeURIComponent(teamId)}&return_full_object=true&include_team_keys=true`
);
const keys: any[] = Array.isArray(data?.keys)
? data.keys
: Array.isArray(data?.data)
? data.data
: Array.isArray(data)
? data
: [];
for (const k of keys) {
if (typeof k !== "object" || k === null) continue;
const alias = k.key_alias ?? k.keyAlias;
if (alias !== keyAlias) continue;
if (typeof k.token !== "string" || !k.token) continue;
return {
token: k.token,
spend: typeof k.spend === "number" ? k.spend : Number(k.spend) || 0,
maxBudget:
typeof k.max_budget === "number"
? k.max_budget
: k.max_budget == null
? null
: Number(k.max_budget) || null,
budgetDuration:
typeof k.budget_duration === "string" ? k.budget_duration : null,
};
}
return null;
}
/**
* Update a virtual key's budget cap and reset duration.
*
* Pass `maxBudget: null` to remove the cap. Pass `budgetDuration:
* null` to make the budget never reset (lifetime cap).
*
* Identified by `key` parameter — accepts either the raw `sk-...`
* token or its hash (LiteLLM accepts both shapes on /key/update).
* The portal flow uses the hash returned by `findKeyByAlias`.
*/
export async function updateKeyBudget(
key: string,
changes: {
maxBudget?: number | null;
budgetDuration?: string | null;
}
): Promise<void> {
const body: Record<string, any> = { key };
if (changes.maxBudget !== undefined) {
body.max_budget = changes.maxBudget;
}
if (changes.budgetDuration !== undefined) {
body.budget_duration = changes.budgetDuration;
}
await litellmFetch("/key/update", {
method: "POST",
body: JSON.stringify(body),
});
}
/**
* Get LiteLLM health status.
*/
export async function getLitellmHealth(): Promise<{
healthy: boolean;
details?: any;
}> {
try {
const data = await litellmFetch("/health");
return { healthy: true, details: data };
} catch (e: any) {
return { healthy: false, details: e.message };
}
}
/**
* Get global spend across all teams for the current month.
*/
export async function getGlobalSpend(): Promise<number> {
try {
const data = await litellmFetch("/global/spend");
// LiteLLM returns { spend: number } or similar
if (typeof data === "number") return data;
return data?.spend ?? data?.total_spend ?? 0;
} catch {
return 0;
}
}
/**
* Fetch per-team spend as a map: teamId → spend (CHF).
* Uses /team/list which includes current spend per team.
*
* Since Slice 2, a "team" is the company-level budget shared across all
* tenants of the same ZITADEL org. So this map gives company totals, not
* per-tenant spend. For per-tenant attribution, use {@link getPerKeySpend}.
*/
export async function getPerTeamSpend(): Promise<Map<string, number>> {
const teams = await listTeams();
const map = new Map<string, number>();
for (const team of teams) {
const id = team.team_id ?? team.id;
const spend = team.spend ?? 0;
if (id) map.set(id, spend);
}
return map;
}
/**
* Fetch per-virtual-key spend as a map: keyAlias → spend (CHF).
*
* Since Slice 2, each PiecedTenant CR owns one virtual key under its
* org's team, with `key_alias = tenant.metadata.name`. Filtering by the
* key alias is how we get genuinely per-tenant spend.
*
* Implementation
* --------------
* Calls `/key/list?return_full_object=true&include_team_keys=true`,
* which returns objects with `spend` and `key_alias`. Older LiteLLM
* builds may return raw token strings instead — we degrade gracefully
* to an empty map in that case rather than throwing, since the admin
* health page should still render even if per-tenant numbers are
* temporarily unavailable.
*
* @returns Map<keyAlias, spend>. May be empty if the LiteLLM build
* doesn't expose key-alias info; callers must handle that.
*/
export async function getPerKeySpend(): Promise<Map<string, number>> {
const map = new Map<string, number>();
try {
const data = await litellmFetch(
"/key/list?return_full_object=true&include_team_keys=true"
);
// Response shape: { keys: [ { key_alias, spend, token, ... } ] }
// or sometimes { data: [...] }, or raw arrays. Be tolerant.
const keys: any[] = Array.isArray(data?.keys)
? data.keys
: Array.isArray(data?.data)
? data.data
: Array.isArray(data)
? data
: [];
for (const k of keys) {
// Skip raw-string entries from older API shapes — we can't attribute them.
if (typeof k !== "object" || k === null) continue;
const alias = k.key_alias ?? k.keyAlias;
if (typeof alias !== "string" || !alias) continue;
const spend =
typeof k.spend === "number" ? k.spend : Number(k.spend) || 0;
map.set(alias, spend);
}
} catch (e) {
console.warn("getPerKeySpend failed, returning empty map:", e);
}
return map;
}