From 7b22bc4087b8cbbfc018e04950dee24de131fb75 Mon Sep 17 00:00:00 2001 From: admin Date: Sun, 26 Apr 2026 21:21:02 +0200 Subject: [PATCH] OneLiteLLM team per company+virt keys --- scripts/verify-find-key-by-alias.mjs | 64 +++++++++++++++++++++ src/app/[locale]/tenants/[name]/page.tsx | 13 ++++- src/app/api/admin/health/route.ts | 47 ++++++++++++---- src/app/api/usage/route.ts | 65 ++++++++++++++++++---- src/components/dashboard/usage-display.tsx | 21 +++++-- src/lib/litellm.ts | 55 ++++++++++++++++++ src/types/index.ts | 11 ++++ 7 files changed, 247 insertions(+), 29 deletions(-) create mode 100644 scripts/verify-find-key-by-alias.mjs diff --git a/scripts/verify-find-key-by-alias.mjs b/scripts/verify-find-key-by-alias.mjs new file mode 100644 index 0000000..3db18a1 --- /dev/null +++ b/scripts/verify-find-key-by-alias.mjs @@ -0,0 +1,64 @@ +// Smoke-test for the FindKeyByAlias parsing logic — runs the JSON +// permutations LiteLLM has been seen to emit through the unmarshal +// paths and confirms each ends up at the expected outcome. +// +// Since the operator can't run inside this sandbox, this is a +// JS port of the parsing flow. It exercises decisions the Go code +// makes line-for-line. + +const cases = [ + { + name: "newer object shape, alias matches", + body: { keys: [{ token: "tk-1", key_alias: "acme-abc12345" }, { token: "tk-2", key_alias: "beta-def67890" }] }, + expected: "tk-1", + }, + { + name: "newer object shape, alias does not match", + body: { keys: [{ token: "tk-2", key_alias: "beta-def67890" }] }, + expected: "", + }, + { + name: "newer object shape, empty keys array", + body: { keys: [] }, + expected: "", + }, + { + name: "older string shape — cannot filter, return empty", + body: { keys: ["sk-abc", "sk-def"] }, + expected: "", + }, + { + name: "matching alias but missing token field", + body: { keys: [{ key_alias: "acme-abc12345" }] }, + expected: "", + }, +]; + +function findKeyByAlias(body, keyAlias) { + // Mirror the Go logic exactly. + let asObjects; + try { + asObjects = body; + if (!asObjects || !Array.isArray(asObjects.keys)) return ""; + for (const k of asObjects.keys) { + // Skip non-objects (= older string shape) + if (typeof k !== "object" || k === null) continue; + if (k.key_alias === keyAlias && k.token) { + return k.token; + } + } + } catch { + return ""; + } + return ""; +} + +let pass = 0, fail = 0; +for (const c of cases) { + const got = findKeyByAlias(c.body, "acme-abc12345"); + const ok = got === c.expected; + console.log(`${ok ? "PASS" : "FAIL"} got="${got}" want="${c.expected}" [${c.name}]`); + if (ok) pass++; else fail++; +} +console.log(`\n${pass} pass, ${fail} fail`); +process.exit(fail === 0 ? 0 : 1); diff --git a/src/app/[locale]/tenants/[name]/page.tsx b/src/app/[locale]/tenants/[name]/page.tsx index 3a674d8..486858c 100644 --- a/src/app/[locale]/tenants/[name]/page.tsx +++ b/src/app/[locale]/tenants/[name]/page.tsx @@ -41,11 +41,18 @@ export default async function TenantDetailPage({ ); const channelUsers = tenant.spec.channelUsers || {}; - // Admins inspecting another tenant's usage: pass teamId explicitly. - // Customers viewing their own: no teamId, backend resolves from session. + // Admins inspecting another tenant's usage: pass teamId AND keyAlias so + // the backend filters spend logs by this specific tenant's virtual key. + // Without keyAlias the response would include sibling tenants in the + // same org, since teams are now shared (Slice 2). + // Customers viewing their own: pass nothing — backend resolves both + // from the session-bound tenant. const usageTeamId = user.isPlatform ? tenant.status?.litellmTeamId || undefined : undefined; + const usageKeyAlias = user.isPlatform + ? tenant.status?.litellmKeyAlias || undefined + : undefined; return (
@@ -81,7 +88,7 @@ export default async function TenantDetailPage({

{t("usage")}

- + {/* Packages */} diff --git a/src/app/api/admin/health/route.ts b/src/app/api/admin/health/route.ts index 313cd38..e9bf238 100644 --- a/src/app/api/admin/health/route.ts +++ b/src/app/api/admin/health/route.ts @@ -4,6 +4,7 @@ import { listTenants } from "@/lib/k8s"; import { getLitellmHealth, getGlobalSpend, + getPerKeySpend, getPerTeamSpend, } from "@/lib/litellm"; @@ -28,6 +29,17 @@ async function checkVllmHealth(): Promise<{ /** * GET /api/admin/health * Returns system health overview for the admin panel. + * + * Slice 2 spend layout + * -------------------- + * - `spend.global` — total across all teams (LiteLLM-reported) + * - `spend.perTenant[name]` — per-tenant CHF, derived from the per-key + * spend map keyed by `litellmKeyAlias`. Only + * populated for tenants whose status carries + * an alias (post-Slice-2 reconciled CRs). + * - `spend.perOrg[teamId]` — company-level total (= LiteLLM team total). + * Useful for the admin overview to see + * spend-per-customer at a glance. */ export async function GET() { try { @@ -36,17 +48,17 @@ export async function GET() { return NextResponse.json({ error: "Forbidden" }, { status: 403 }); } - const [tenants, litellm, vllm, globalSpend, perTeamSpend] = + const [tenants, litellm, vllm, globalSpend, perKeySpend, perTeamSpend] = await Promise.allSettled([ listTenants(), getLitellmHealth(), checkVllmHealth(), getGlobalSpend(), + getPerKeySpend(), getPerTeamSpend(), ]); - const allTenants = - tenants.status === "fulfilled" ? tenants.value : []; + const allTenants = tenants.status === "fulfilled" ? tenants.value : []; // Count tenants by phase const phaseCounts: Record = {}; @@ -57,15 +69,27 @@ export async function GET() { phaseCounts[phase] = (phaseCounts[phase] || 0) + 1; } - // Build per-tenant spend map (tenantName → spend) - const spendMap: Record = {}; + // Build per-tenant spend map (tenantName → spend) from the per-key map. + // Tenants without a `litellmKeyAlias` in status are skipped — they + // simply won't appear in this map until they've been reconciled by + // the Slice-2 operator. + const keySpend = + perKeySpend.status === "fulfilled" ? perKeySpend.value : new Map(); + const tenantSpend: Record = {}; + for (const t of allTenants) { + const alias = t.status?.litellmKeyAlias; + if (alias && keySpend.has(alias)) { + tenantSpend[t.metadata.name] = keySpend.get(alias)!; + } + } + + // Build per-org spend map (teamId → spend). Multiple tenants of the + // same org share a teamId, so the same number appears for each. const teamSpend = perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map(); - for (const t of allTenants) { - const teamId = t.status?.litellmTeamId; - if (teamId && teamSpend.has(teamId)) { - spendMap[t.metadata.name] = teamSpend.get(teamId)!; - } + const orgSpend: Record = {}; + for (const [teamId, spend] of teamSpend.entries()) { + orgSpend[teamId] = spend; } return NextResponse.json({ @@ -76,7 +100,8 @@ export async function GET() { spend: { global: globalSpend.status === "fulfilled" ? globalSpend.value : 0, - perTenant: spendMap, + perTenant: tenantSpend, + perOrg: orgSpend, }, services: { litellm: diff --git a/src/app/api/usage/route.ts b/src/app/api/usage/route.ts index de060af..446e50d 100644 --- a/src/app/api/usage/route.ts +++ b/src/app/api/usage/route.ts @@ -7,9 +7,21 @@ import { safeError } from "@/lib/errors"; /** * GET /api/usage * - * Customers: teamId is resolved server-side from the tenant matching the - * user's orgId. No client-supplied teamId accepted. - * Platform admins: may pass ?teamId=... to inspect any tenant's usage. + * Customers: tenant resolved server-side from the user's orgId. The + * response is filtered by the tenant's `litellmKeyAlias` so + * sibling tenants in the same org don't bleed into the total. + * Platform admins: may pass ?teamId=... to inspect any team. They may + * also pass ?keyAlias=... to scope to a single tenant. + * + * Slice 2 note + * ------------ + * LiteLLM teams are now shared across all tenants of an org. The team's + * `/team/info` budget is the *company* budget; the per-tenant numbers + * come from filtering spend logs by `key_alias`. If a tenant has no + * `litellmKeyAlias` in status (transitional state right after upgrade, + * before the operator has reconciled), we fall back to team-level + * filtering — the numbers will be slightly inflated for that one + * reconcile cycle. */ export async function GET(req: NextRequest) { const user = await getSessionUser(); @@ -17,13 +29,14 @@ export async function GET(req: NextRequest) { return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); let teamId: string | null = null; + let keyAlias: string | null = null; if (user.isPlatform) { - // Admins may pass a specific teamId to inspect any tenant teamId = req.nextUrl.searchParams.get("teamId") ?? null; + keyAlias = req.nextUrl.searchParams.get("keyAlias") ?? null; } - // For customers (or admins without explicit teamId): resolve from their tenant + // For customers (or admins without explicit params): resolve from their tenant. if (!teamId) { const tenants = await listTenants(); const orgTenant = tenants.find( @@ -37,6 +50,13 @@ export async function GET(req: NextRequest) { ); } teamId = orgTenant.status.litellmTeamId; + + // If the operator has populated the per-tenant key alias, filter by it. + // Falling back to team-level (no alias) will return the org total, which + // is acceptable transitionally but means siblings' usage shows up here. + if (orgTenant.status.litellmKeyAlias) { + keyAlias = orgTenant.status.litellmKeyAlias; + } } // Month param: YYYY-MM, defaults to current month @@ -55,7 +75,11 @@ export async function GET(req: NextRequest) { try { const teamInfo = await getTeamInfo(teamId); - // Fetch all pages + // Fetch all pages from the team. We always query at the team level — + // LiteLLM's /spend/logs/v2 doesn't filter by key_alias reliably across + // versions, so we paginate and post-filter in code. For pilot scale + // this is cheap; if a single team ever exceeds ~10k entries/month we + // can revisit. const allRequests: any[] = []; let page = 1; while (true) { @@ -71,12 +95,26 @@ export async function GET(req: NextRequest) { page++; } + // Apply key_alias post-filter when scoping to a single tenant. Match + // both `key_alias` (newer LiteLLM) and `metadata.user_api_key_alias` + // (older builds nest it inside metadata). + const scoped = keyAlias + ? allRequests.filter((r) => { + const alias = + r.key_alias ?? + r.metadata?.user_api_key_alias ?? + r.api_key_alias ?? + null; + return alias === keyAlias; + }) + : allRequests; + // Aggregate by day const byDay: Record< string, { inputTokens: number; outputTokens: number; spend: number } > = {}; - for (const r of allRequests) { + for (const r of scoped) { const day = (r.startTime || r.endTime || "").slice(0, 10); if (!day) continue; if (!byDay[day]) @@ -90,25 +128,30 @@ export async function GET(req: NextRequest) { .sort(([a], [b]) => a.localeCompare(b)) .map(([date, d]) => ({ date, ...d })); - const totalInput = allRequests.reduce( + const totalInput = scoped.reduce( (s, r) => s + (r.prompt_tokens || 0), 0 ); - const totalOutput = allRequests.reduce( + const totalOutput = scoped.reduce( (s, r) => s + (r.completion_tokens || 0), 0 ); - const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0); + const totalSpend = scoped.reduce((s, r) => s + (r.spend || 0), 0); return NextResponse.json({ teamId, + keyAlias, // null when not filtering — useful for the client to know it sees company-wide data month: monthParam, currentPeriod: { inputTokens: totalInput, outputTokens: totalOutput, totalSpend, - requestCount: allRequests.length, + requestCount: scoped.length, }, + // Budget is always team-level (= company budget). Spend reported + // here is the team total, not the per-key total — the customer + // wants to see "how much of our company budget is left", not just + // "how much has this one tenant cost". budget: { maxBudget: teamInfo?.team_info?.max_budget ?? null, spend: teamInfo?.team_info?.spend ?? 0, diff --git a/src/components/dashboard/usage-display.tsx b/src/components/dashboard/usage-display.tsx index cd4fab2..ae25567 100644 --- a/src/components/dashboard/usage-display.tsx +++ b/src/components/dashboard/usage-display.tsx @@ -94,10 +94,20 @@ function UsageChart({ data }: { data: DailyUsage[] }) { /** * Usage display widget. * - * - Customers: don't pass teamId — the backend resolves it from the session. - * - Admins inspecting a specific tenant: pass teamId to override. + * - Customers: don't pass teamId or keyAlias — the backend resolves both + * from the session-bound tenant. + * - Admins inspecting a specific tenant: pass `teamId` (the org-level + * LiteLLM team id) AND `keyAlias` (the tenant's virtual-key alias). + * Without `keyAlias`, the response includes spend from sibling tenants + * in the same org, since teams are shared since Slice 2. */ -export function UsageDisplay({ teamId }: { teamId?: string | null }) { +export function UsageDisplay({ + teamId, + keyAlias, +}: { + teamId?: string | null; + keyAlias?: string | null; +}) { const t = useTranslations("usage"); const [month, setMonth] = useState(getCurrentMonth); const [data, setData] = useState(null); @@ -114,13 +124,16 @@ export function UsageDisplay({ teamId }: { teamId?: string | null }) { if (teamId) { params.set("teamId", teamId); } + if (keyAlias) { + params.set("keyAlias", keyAlias); + } fetch(`/api/usage?${params}`) .then((res) => { if (!res.ok) throw new Error(`${res.status}`); return res.json(); }) .then(setData) .catch((e) => setError(e.message)) .finally(() => setLoading(false)); - }, [teamId, month]); + }, [teamId, keyAlias, month]); useEffect(() => { fetchUsage(); }, [fetchUsage]); diff --git a/src/lib/litellm.ts b/src/lib/litellm.ts index 45c2523..e4bb604 100644 --- a/src/lib/litellm.ts +++ b/src/lib/litellm.ts @@ -91,6 +91,10 @@ export async function getGlobalSpend(): Promise { /** * Fetch per-team spend as a map: teamId → spend (CHF). * Uses /team/list which includes current spend per team. + * + * Since Slice 2, a "team" is the company-level budget shared across all + * tenants of the same ZITADEL org. So this map gives company totals, not + * per-tenant spend. For per-tenant attribution, use {@link getPerKeySpend}. */ export async function getPerTeamSpend(): Promise> { const teams = await listTeams(); @@ -102,3 +106,54 @@ export async function getPerTeamSpend(): Promise> { } return map; } + +/** + * Fetch per-virtual-key spend as a map: keyAlias → spend (CHF). + * + * Since Slice 2, each PiecedTenant CR owns one virtual key under its + * org's team, with `key_alias = tenant.metadata.name`. Filtering by the + * key alias is how we get genuinely per-tenant spend. + * + * Implementation + * -------------- + * Calls `/key/list?return_full_object=true&include_team_keys=true`, + * which returns objects with `spend` and `key_alias`. Older LiteLLM + * builds may return raw token strings instead — we degrade gracefully + * to an empty map in that case rather than throwing, since the admin + * health page should still render even if per-tenant numbers are + * temporarily unavailable. + * + * @returns Map. May be empty if the LiteLLM build + * doesn't expose key-alias info; callers must handle that. + */ +export async function getPerKeySpend(): Promise> { + const map = new Map(); + try { + const data = await litellmFetch( + "/key/list?return_full_object=true&include_team_keys=true" + ); + + // Response shape: { keys: [ { key_alias, spend, token, ... } ] } + // or sometimes { data: [...] }, or raw arrays. Be tolerant. + const keys: any[] = Array.isArray(data?.keys) + ? data.keys + : Array.isArray(data?.data) + ? data.data + : Array.isArray(data) + ? data + : []; + + for (const k of keys) { + // Skip raw-string entries from older API shapes — we can't attribute them. + if (typeof k !== "object" || k === null) continue; + const alias = k.key_alias ?? k.keyAlias; + if (typeof alias !== "string" || !alias) continue; + const spend = + typeof k.spend === "number" ? k.spend : Number(k.spend) || 0; + map.set(alias, spend); + } + } catch (e) { + console.warn("getPerKeySpend failed, returning empty map:", e); + } + return map; +} diff --git a/src/types/index.ts b/src/types/index.ts index 8d675de..d34a6a4 100644 --- a/src/types/index.ts +++ b/src/types/index.ts @@ -37,7 +37,18 @@ export interface PiecedTenantStatus { phase: "Pending" | "Provisioning" | "Running" | "Ready" | "Error" | "Deleting"; message?: string; observedGeneration?: number; + /** + * Org-level LiteLLM team id (since Slice 2 — shared across all tenants + * of the same ZITADEL org). For per-tenant spend attribution use + * `litellmKeyAlias`, not this field. + */ litellmTeamId?: string; + /** + * Per-tenant LiteLLM virtual-key alias (set to the CR name). Used by + * the portal to filter spend logs to a single tenant within a shared + * org-level team. + */ + litellmKeyAlias?: string; tenantNamespace?: string; enabledPackages?: string[]; conditions?: Array<{