From d375a099f09f57042d35e2ca534b0486a11f63aa Mon Sep 17 00:00:00 2001 From: admin Date: Sat, 2 May 2026 23:43:02 +0200 Subject: [PATCH] Limit by tenant and org --- src/app/api/tenants/[name]/budget/route.ts | 126 ++++++++++++++++++ src/app/api/usage/route.ts | 59 ++++++-- .../dashboard/budget-editable-card.tsx | 5 +- src/lib/litellm.ts | 88 ++++++++++++ src/messages/de.json | 3 +- src/messages/en.json | 3 +- src/messages/fr.json | 3 +- src/messages/it.json | 3 +- 8 files changed, 266 insertions(+), 24 deletions(-) create mode 100644 src/app/api/tenants/[name]/budget/route.ts diff --git a/src/app/api/tenants/[name]/budget/route.ts b/src/app/api/tenants/[name]/budget/route.ts new file mode 100644 index 0000000..0c77b5a --- /dev/null +++ b/src/app/api/tenants/[name]/budget/route.ts @@ -0,0 +1,126 @@ +import { NextRequest, NextResponse } from "next/server"; +import { z } from "zod"; +import { getSessionUser, canMutate } from "@/lib/session"; +import { getTenant } from "@/lib/k8s"; +import { canUserSeeTenant } from "@/lib/visibility"; +import { findKeyByAlias, updateKeyBudget } from "@/lib/litellm"; +import { safeError } from "@/lib/errors"; + +/** + * Update the per-tenant budget — operates on the LiteLLM virtual + * key, NOT on the team. + * + * Why per-key + * ----------- + * Each tenant in an org has its own virtual key + * (`key_alias = tenant.metadata.name`); the team that owns those + * keys is org-scoped and shared across all the org's tenants. A + * budget on the team would cap the whole org; a budget on the key + * caps just this one tenant. Customers landing on the tenant detail + * page reasonably expect "edit budget" to mean "the budget of THIS + * tenant" — so we put it on the key. + * + * The team-level (org-wide) budget is a separate control that lives + * in /settings (not yet implemented) — the two coexist: LiteLLM + * applies whichever cap is hit first. + * + * Schema: + * - maxBudget: number > 0 (set a cap), or null (remove the cap). + * - budgetDuration: one of "30d", "1mo", "1y", or null (lifetime). + * + * Authorization: owners and platform admins. + */ + +const patchSchema = z.object({ + // > 0 because LiteLLM rejects 0 and a zero cap would lock the key + // out instantly. Upper bound 1M as a typo guard. + maxBudget: z.number().positive().max(1_000_000).nullable(), + budgetDuration: z.enum(["30d", "1mo", "1y"]).nullable(), +}); + +export async function PATCH( + req: NextRequest, + { params }: { params: Promise<{ name: string }> } +) { + const user = await getSessionUser(); + if (!user) { + return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); + } + if (!canMutate(user)) { + return NextResponse.json({ error: "Forbidden" }, { status: 403 }); + } + + const { name } = await params; + const tenant = await getTenant(name); + if (!tenant) { + return NextResponse.json({ error: "Not found" }, { status: 404 }); + } + if (!(await canUserSeeTenant(user, tenant))) { + // Don't leak existence — same 404 a non-visible tenant gets. + return NextResponse.json({ error: "Not found" }, { status: 404 }); + } + + const teamId = tenant.status?.litellmTeamId; + if (!teamId) { + return NextResponse.json( + { + error: + "Tenant has no LiteLLM team yet. Please wait until provisioning completes.", + }, + { status: 409 } + ); + } + + const body = await req.json().catch(() => null); + const parsed = patchSchema.safeParse(body); + if (!parsed.success) { + return NextResponse.json( + { error: "Invalid input", details: parsed.error.flatten() }, + { status: 400 } + ); + } + + // Defensive: removing the cap should null out the duration too — + // a reset cadence on an unlimited budget is meaningless and would + // confuse LiteLLM's bookkeeping. + const maxBudget = parsed.data.maxBudget; + const budgetDuration = + maxBudget === null ? null : parsed.data.budgetDuration; + + // Look up the key by alias (= tenant name). The token returned is + // what /key/update wants in the `key` field. + let keyInfo; + try { + keyInfo = await findKeyByAlias(teamId, name); + } catch (e: any) { + console.error("Failed to look up tenant key:", e); + return NextResponse.json( + { error: safeError(e, "Failed to look up tenant key") }, + { status: 500 } + ); + } + if (!keyInfo) { + return NextResponse.json( + { + error: + "Tenant has no virtual key yet. Please wait until provisioning completes.", + }, + { status: 409 } + ); + } + + try { + await updateKeyBudget(keyInfo.token, { maxBudget, budgetDuration }); + return NextResponse.json({ + message: maxBudget === null ? "Budget removed." : "Budget updated.", + maxBudget, + budgetDuration, + }); + } catch (e: any) { + console.error("Failed to update key budget:", e); + return NextResponse.json( + { error: safeError(e, "Failed to update budget") }, + { status: 500 } + ); + } +} diff --git a/src/app/api/usage/route.ts b/src/app/api/usage/route.ts index 96cb65d..a3f1ad1 100644 --- a/src/app/api/usage/route.ts +++ b/src/app/api/usage/route.ts @@ -2,7 +2,11 @@ import { NextRequest, NextResponse } from "next/server"; import { getSessionUser } from "@/lib/session"; import { listTenants } from "@/lib/k8s"; import { listVisibleTenants } from "@/lib/visibility"; -import { getTeamInfo, getTeamSpendLogsV2 } from "@/lib/litellm"; +import { + getTeamInfo, + getTeamSpendLogsV2, + findKeyByAlias, +} from "@/lib/litellm"; import { safeError } from "@/lib/errors"; /** @@ -126,6 +130,16 @@ export async function GET(req: NextRequest) { try { const teamInfo = await getTeamInfo(teamId); + // Per-tenant budget lives on the virtual key, not the team + // (Feature 7 fix). When the request is scoped to a specific + // tenant (keyAlias provided), look up the key so we can return + // the per-tenant cap. Tolerate failure — older LiteLLM builds + // or short-lived race conditions during provisioning shouldn't + // 500 the whole usage page; we degrade to "no key info". + const keyInfo = keyAlias + ? await findKeyByAlias(teamId, keyAlias).catch(() => null) + : null; + // Page through results — server-side filtered by key_alias when // provided. Pagination still needed because LiteLLM caps // page_size at 100, and a busy tenant can easily exceed that in @@ -191,17 +205,38 @@ export async function GET(req: NextRequest) { totalSpend, requestCount: allRequests.length, }, - // Budget is always team-level (= company budget). Spend reported - // here is the team total, not the per-key total — the customer - // wants to see "how much of our company budget is left", not - // just "how much has this one tenant cost". - budget: { - maxBudget: teamInfo?.team_info?.max_budget ?? null, - spend: teamInfo?.team_info?.spend ?? 0, - remaining: teamInfo?.team_info?.max_budget - ? teamInfo.team_info.max_budget - (teamInfo.team_info.spend ?? 0) - : null, - }, + // Budget reporting (Feature 7). + // + // When the caller scopes to a specific tenant (keyAlias set), + // we report THAT tenant's per-key budget — that's what the + // tenant detail page renders, and what the customer expects + // when they see "Budget" on a tenant's page. + // + // When unscoped (admin / org-wide view), we fall back to the + // team budget — that's the org-wide cap, conceptually different + // but the only thing meaningful at that scope. + // + // The two cases display the same way; the editor button gates + // on whether we know which tenant we're on (= keyAlias set). + budget: keyAlias && keyInfo + ? { + maxBudget: keyInfo.maxBudget, + spend: keyInfo.spend, + remaining: + keyInfo.maxBudget !== null + ? keyInfo.maxBudget - keyInfo.spend + : null, + budgetDuration: keyInfo.budgetDuration, + } + : { + maxBudget: teamInfo?.team_info?.max_budget ?? null, + spend: teamInfo?.team_info?.spend ?? 0, + remaining: teamInfo?.team_info?.max_budget + ? teamInfo.team_info.max_budget - + (teamInfo.team_info.spend ?? 0) + : null, + budgetDuration: teamInfo?.team_info?.budget_duration ?? null, + }, rateLimits: { rpm: teamInfo?.team_info?.rpm_limit ?? null, tpm: teamInfo?.team_info?.tpm_limit ?? null, diff --git a/src/components/dashboard/budget-editable-card.tsx b/src/components/dashboard/budget-editable-card.tsx index 41553bf..7089eae 100644 --- a/src/components/dashboard/budget-editable-card.tsx +++ b/src/components/dashboard/budget-editable-card.tsx @@ -162,12 +162,9 @@ export function BudgetEditableCard({

{t("budgetEditTitle")}

-

+

{t("budgetEditDescription")}

-
- {t("budgetOrgScopeWarning")} -
{/* Mode toggle: unlimited vs capped. Two radios are diff --git a/src/lib/litellm.ts b/src/lib/litellm.ts index 104f42d..d55d8b9 100644 --- a/src/lib/litellm.ts +++ b/src/lib/litellm.ts @@ -93,6 +93,94 @@ export async function listTeams(): Promise { return Array.isArray(data) ? data : data?.data ?? data?.teams ?? []; } +/** + * Find a virtual key on a team by its alias and return its current + * state (token, spend, budget cap, reset cadence). Returns null if + * the alias doesn't match any key on the team. + * + * Why we need this + * ---------------- + * Per-tenant budgets live on the virtual key, not the team. The + * portal needs to: + * 1. Display the current key's `max_budget` / `budget_duration` / + * `spend` on the tenant detail page. + * 2. Pass the key's `token` to `/key/update` when the customer + * changes the budget. + * + * The token is opaque to the customer; the operator's + * `FindKeyByAlias` does the same lookup for stale-key cleanup. We + * mirror its API call here. + */ +export async function findKeyByAlias( + teamId: string, + keyAlias: string +): Promise<{ + token: string; + spend: number; + maxBudget: number | null; + budgetDuration: string | null; +} | null> { + const data = await litellmFetch( + `/key/list?team_id=${encodeURIComponent(teamId)}&return_full_object=true&include_team_keys=true` + ); + const keys: any[] = Array.isArray(data?.keys) + ? data.keys + : Array.isArray(data?.data) + ? data.data + : Array.isArray(data) + ? data + : []; + for (const k of keys) { + if (typeof k !== "object" || k === null) continue; + const alias = k.key_alias ?? k.keyAlias; + if (alias !== keyAlias) continue; + if (typeof k.token !== "string" || !k.token) continue; + return { + token: k.token, + spend: typeof k.spend === "number" ? k.spend : Number(k.spend) || 0, + maxBudget: + typeof k.max_budget === "number" + ? k.max_budget + : k.max_budget == null + ? null + : Number(k.max_budget) || null, + budgetDuration: + typeof k.budget_duration === "string" ? k.budget_duration : null, + }; + } + return null; +} + +/** + * Update a virtual key's budget cap and reset duration. + * + * Pass `maxBudget: null` to remove the cap. Pass `budgetDuration: + * null` to make the budget never reset (lifetime cap). + * + * Identified by `key` parameter — accepts either the raw `sk-...` + * token or its hash (LiteLLM accepts both shapes on /key/update). + * The portal flow uses the hash returned by `findKeyByAlias`. + */ +export async function updateKeyBudget( + key: string, + changes: { + maxBudget?: number | null; + budgetDuration?: string | null; + } +): Promise { + const body: Record = { key }; + if (changes.maxBudget !== undefined) { + body.max_budget = changes.maxBudget; + } + if (changes.budgetDuration !== undefined) { + body.budget_duration = changes.budgetDuration; + } + await litellmFetch("/key/update", { + method: "POST", + body: JSON.stringify(body), + }); +} + /** * Get LiteLLM health status. */ diff --git a/src/messages/de.json b/src/messages/de.json index b5a4578..3b326f2 100644 --- a/src/messages/de.json +++ b/src/messages/de.json @@ -192,8 +192,7 @@ "requests": "Anfragen", "budgetEdit": "Bearbeiten", "budgetEditTitle": "Budget festlegen", - "budgetEditDescription": "Begrenzen Sie, wie viel Ihre Assistenten ausgeben können, bevor Anfragen abgelehnt werden.", - "budgetOrgScopeWarning": "Dieses Budget gilt für alle Tenants Ihrer Organisation, nicht nur für diesen. Bei mehreren Tenants teilen sich diese das Limit.", + "budgetEditDescription": "Begrenzen Sie, wie viel die Assistenten dieses Tenants ausgeben können, bevor Anfragen abgelehnt werden.", "budgetModeUnlimited": "Kein Limit", "budgetModeUnlimitedDescription": "Beliebige Ausgaben, kein Limit.", "budgetModeCapped": "Limit festlegen", diff --git a/src/messages/en.json b/src/messages/en.json index 3a411b5..60fceaf 100644 --- a/src/messages/en.json +++ b/src/messages/en.json @@ -192,8 +192,7 @@ "requests": "requests", "budgetEdit": "Edit", "budgetEditTitle": "Set spending budget", - "budgetEditDescription": "Cap how much your assistants can spend before requests start being declined.", - "budgetOrgScopeWarning": "This budget applies to all tenants in your organization, not just this one. If you have multiple tenants, they share the same cap.", + "budgetEditDescription": "Cap how much this tenant's assistants can spend before requests start being declined.", "budgetModeUnlimited": "No limit", "budgetModeUnlimitedDescription": "Spend as much as needed; no cap.", "budgetModeCapped": "Set a cap", diff --git a/src/messages/fr.json b/src/messages/fr.json index 24f8243..1ed411e 100644 --- a/src/messages/fr.json +++ b/src/messages/fr.json @@ -192,8 +192,7 @@ "requests": "requêtes", "budgetEdit": "Modifier", "budgetEditTitle": "Définir un budget", - "budgetEditDescription": "Limitez la dépense de vos assistants avant que les requêtes ne soient refusées.", - "budgetOrgScopeWarning": "Ce budget s'applique à tous les locataires de votre organisation, pas seulement à celui-ci. Si vous avez plusieurs locataires, ils partagent le même plafond.", + "budgetEditDescription": "Limitez la dépense des assistants de ce locataire avant que les requêtes ne soient refusées.", "budgetModeUnlimited": "Aucune limite", "budgetModeUnlimitedDescription": "Dépense libre, sans plafond.", "budgetModeCapped": "Définir un plafond", diff --git a/src/messages/it.json b/src/messages/it.json index e122ac1..31004c6 100644 --- a/src/messages/it.json +++ b/src/messages/it.json @@ -192,8 +192,7 @@ "requests": "richieste", "budgetEdit": "Modifica", "budgetEditTitle": "Imposta budget", - "budgetEditDescription": "Limita quanto i tuoi assistenti possono spendere prima che le richieste vengano rifiutate.", - "budgetOrgScopeWarning": "Questo budget si applica a tutti i tenant della tua organizzazione, non solo a questo. Se hai più tenant, condividono lo stesso limite.", + "budgetEditDescription": "Limita quanto gli assistenti di questo tenant possono spendere prima che le richieste vengano rifiutate.", "budgetModeUnlimited": "Nessun limite", "budgetModeUnlimitedDescription": "Spesa libera, nessun tetto.", "budgetModeCapped": "Imposta un tetto",