Compare commits

...

3 Commits

Author SHA1 Message Date
d375a099f0 Limit by tenant and org
All checks were successful
Build and Push / build (push) Successful in 1m26s
2026-05-02 23:43:02 +02:00
666dd64580 Budget setting and all dollar to chf
All checks were successful
Build and Push / build (push) Successful in 1m33s
2026-05-02 23:25:24 +02:00
188bef2ece Budget setting and all dollar to chf
All checks were successful
Build and Push / build (push) Successful in 1m28s
2026-05-02 23:16:14 +02:00
9 changed files with 324 additions and 27 deletions

View File

@@ -199,7 +199,7 @@ export default async function TenantDetailPage({
<h2 className="text-xs font-semibold uppercase tracking-wider text-text-muted mb-3"> <h2 className="text-xs font-semibold uppercase tracking-wider text-text-muted mb-3">
{t("usage")} {t("usage")}
</h2> </h2>
<UsageDisplay tenant={name} /> <UsageDisplay tenant={name} canEditBudget={canEdit} />
</section> </section>
{/* Packages */} {/* Packages */}

View File

@@ -0,0 +1,126 @@
import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { findKeyByAlias, updateKeyBudget } from "@/lib/litellm";
import { safeError } from "@/lib/errors";
/**
* Update the per-tenant budget — operates on the LiteLLM virtual
* key, NOT on the team.
*
* Why per-key
* -----------
* Each tenant in an org has its own virtual key
* (`key_alias = tenant.metadata.name`); the team that owns those
* keys is org-scoped and shared across all the org's tenants. A
* budget on the team would cap the whole org; a budget on the key
* caps just this one tenant. Customers landing on the tenant detail
* page reasonably expect "edit budget" to mean "the budget of THIS
* tenant" — so we put it on the key.
*
* The team-level (org-wide) budget is a separate control that lives
* in /settings (not yet implemented) — the two coexist: LiteLLM
* applies whichever cap is hit first.
*
* Schema:
* - maxBudget: number > 0 (set a cap), or null (remove the cap).
* - budgetDuration: one of "30d", "1mo", "1y", or null (lifetime).
*
* Authorization: owners and platform admins.
*/
const patchSchema = z.object({
// > 0 because LiteLLM rejects 0 and a zero cap would lock the key
// out instantly. Upper bound 1M as a typo guard.
maxBudget: z.number().positive().max(1_000_000).nullable(),
budgetDuration: z.enum(["30d", "1mo", "1y"]).nullable(),
});
export async function PATCH(
req: NextRequest,
{ params }: { params: Promise<{ name: string }> }
) {
const user = await getSessionUser();
if (!user) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
if (!canMutate(user)) {
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
}
const { name } = await params;
const tenant = await getTenant(name);
if (!tenant) {
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
if (!(await canUserSeeTenant(user, tenant))) {
// Don't leak existence — same 404 a non-visible tenant gets.
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
const teamId = tenant.status?.litellmTeamId;
if (!teamId) {
return NextResponse.json(
{
error:
"Tenant has no LiteLLM team yet. Please wait until provisioning completes.",
},
{ status: 409 }
);
}
const body = await req.json().catch(() => null);
const parsed = patchSchema.safeParse(body);
if (!parsed.success) {
return NextResponse.json(
{ error: "Invalid input", details: parsed.error.flatten() },
{ status: 400 }
);
}
// Defensive: removing the cap should null out the duration too —
// a reset cadence on an unlimited budget is meaningless and would
// confuse LiteLLM's bookkeeping.
const maxBudget = parsed.data.maxBudget;
const budgetDuration =
maxBudget === null ? null : parsed.data.budgetDuration;
// Look up the key by alias (= tenant name). The token returned is
// what /key/update wants in the `key` field.
let keyInfo;
try {
keyInfo = await findKeyByAlias(teamId, name);
} catch (e: any) {
console.error("Failed to look up tenant key:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to look up tenant key") },
{ status: 500 }
);
}
if (!keyInfo) {
return NextResponse.json(
{
error:
"Tenant has no virtual key yet. Please wait until provisioning completes.",
},
{ status: 409 }
);
}
try {
await updateKeyBudget(keyInfo.token, { maxBudget, budgetDuration });
return NextResponse.json({
message: maxBudget === null ? "Budget removed." : "Budget updated.",
maxBudget,
budgetDuration,
});
} catch (e: any) {
console.error("Failed to update key budget:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to update budget") },
{ status: 500 }
);
}
}

View File

@@ -2,7 +2,11 @@ import { NextRequest, NextResponse } from "next/server";
import { getSessionUser } from "@/lib/session"; import { getSessionUser } from "@/lib/session";
import { listTenants } from "@/lib/k8s"; import { listTenants } from "@/lib/k8s";
import { listVisibleTenants } from "@/lib/visibility"; import { listVisibleTenants } from "@/lib/visibility";
import { getTeamInfo, getTeamSpendLogsV2 } from "@/lib/litellm"; import {
getTeamInfo,
getTeamSpendLogsV2,
findKeyByAlias,
} from "@/lib/litellm";
import { safeError } from "@/lib/errors"; import { safeError } from "@/lib/errors";
/** /**
@@ -126,6 +130,16 @@ export async function GET(req: NextRequest) {
try { try {
const teamInfo = await getTeamInfo(teamId); const teamInfo = await getTeamInfo(teamId);
// Per-tenant budget lives on the virtual key, not the team
// (Feature 7 fix). When the request is scoped to a specific
// tenant (keyAlias provided), look up the key so we can return
// the per-tenant cap. Tolerate failure — older LiteLLM builds
// or short-lived race conditions during provisioning shouldn't
// 500 the whole usage page; we degrade to "no key info".
const keyInfo = keyAlias
? await findKeyByAlias(teamId, keyAlias).catch(() => null)
: null;
// Page through results — server-side filtered by key_alias when // Page through results — server-side filtered by key_alias when
// provided. Pagination still needed because LiteLLM caps // provided. Pagination still needed because LiteLLM caps
// page_size at 100, and a busy tenant can easily exceed that in // page_size at 100, and a busy tenant can easily exceed that in
@@ -191,17 +205,38 @@ export async function GET(req: NextRequest) {
totalSpend, totalSpend,
requestCount: allRequests.length, requestCount: allRequests.length,
}, },
// Budget is always team-level (= company budget). Spend reported // Budget reporting (Feature 7).
// here is the team total, not the per-key total — the customer //
// wants to see "how much of our company budget is left", not // When the caller scopes to a specific tenant (keyAlias set),
// just "how much has this one tenant cost". // we report THAT tenant's per-key budget — that's what the
budget: { // tenant detail page renders, and what the customer expects
maxBudget: teamInfo?.team_info?.max_budget ?? null, // when they see "Budget" on a tenant's page.
spend: teamInfo?.team_info?.spend ?? 0, //
remaining: teamInfo?.team_info?.max_budget // When unscoped (admin / org-wide view), we fall back to the
? teamInfo.team_info.max_budget - (teamInfo.team_info.spend ?? 0) // team budget — that's the org-wide cap, conceptually different
: null, // but the only thing meaningful at that scope.
}, //
// The two cases display the same way; the editor button gates
// on whether we know which tenant we're on (= keyAlias set).
budget: keyAlias && keyInfo
? {
maxBudget: keyInfo.maxBudget,
spend: keyInfo.spend,
remaining:
keyInfo.maxBudget !== null
? keyInfo.maxBudget - keyInfo.spend
: null,
budgetDuration: keyInfo.budgetDuration,
}
: {
maxBudget: teamInfo?.team_info?.max_budget ?? null,
spend: teamInfo?.team_info?.spend ?? 0,
remaining: teamInfo?.team_info?.max_budget
? teamInfo.team_info.max_budget -
(teamInfo.team_info.spend ?? 0)
: null,
budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
},
rateLimits: { rateLimits: {
rpm: teamInfo?.team_info?.rpm_limit ?? null, rpm: teamInfo?.team_info?.rpm_limit ?? null,
tpm: teamInfo?.team_info?.tpm_limit ?? null, tpm: teamInfo?.team_info?.tpm_limit ?? null,

View File

@@ -126,12 +126,7 @@ export function BudgetEditableCard({
<> <>
<button <button
type="button" type="button"
onClick={() => { onClick={() => setOpen(true)}
// Temporary debug aid — if clicks reach the handler we'll
// see this in the browser console. Remove once confirmed.
console.log("[BudgetEditableCard] open clicked");
setOpen(true);
}}
className="bg-surface-1 border border-accent/40 rounded-xl p-4 text-left hover:border-accent transition-colors cursor-pointer focus:outline-none focus:ring-2 focus:ring-accent/40 group block w-full" className="bg-surface-1 border border-accent/40 rounded-xl p-4 text-left hover:border-accent transition-colors cursor-pointer focus:outline-none focus:ring-2 focus:ring-accent/40 group block w-full"
> >
<div className="text-xs text-text-muted mb-1 flex items-center justify-between"> <div className="text-xs text-text-muted mb-1 flex items-center justify-between">
@@ -167,12 +162,9 @@ export function BudgetEditableCard({
<h3 className="font-display text-lg font-semibold text-text-primary mb-2"> <h3 className="font-display text-lg font-semibold text-text-primary mb-2">
{t("budgetEditTitle")} {t("budgetEditTitle")}
</h3> </h3>
<p className="text-sm text-text-secondary mb-4"> <p className="text-sm text-text-secondary mb-5">
{t("budgetEditDescription")} {t("budgetEditDescription")}
</p> </p>
<div className="text-xs text-amber-400 bg-amber-400/10 border border-amber-400/20 rounded-lg px-3 py-2 mb-5">
{t("budgetOrgScopeWarning")}
</div>
<form onSubmit={onSubmit} className="space-y-4"> <form onSubmit={onSubmit} className="space-y-4">
{/* Mode toggle: unlimited vs capped. Two radios are {/* Mode toggle: unlimited vs capped. Two radios are

View File

@@ -93,6 +93,94 @@ export async function listTeams(): Promise<any[]> {
return Array.isArray(data) ? data : data?.data ?? data?.teams ?? []; return Array.isArray(data) ? data : data?.data ?? data?.teams ?? [];
} }
/**
* Find a virtual key on a team by its alias and return its current
* state (token, spend, budget cap, reset cadence). Returns null if
* the alias doesn't match any key on the team.
*
* Why we need this
* ----------------
* Per-tenant budgets live on the virtual key, not the team. The
* portal needs to:
* 1. Display the current key's `max_budget` / `budget_duration` /
* `spend` on the tenant detail page.
* 2. Pass the key's `token` to `/key/update` when the customer
* changes the budget.
*
* The token is opaque to the customer; the operator's
* `FindKeyByAlias` does the same lookup for stale-key cleanup. We
* mirror its API call here.
*/
export async function findKeyByAlias(
teamId: string,
keyAlias: string
): Promise<{
token: string;
spend: number;
maxBudget: number | null;
budgetDuration: string | null;
} | null> {
const data = await litellmFetch(
`/key/list?team_id=${encodeURIComponent(teamId)}&return_full_object=true&include_team_keys=true`
);
const keys: any[] = Array.isArray(data?.keys)
? data.keys
: Array.isArray(data?.data)
? data.data
: Array.isArray(data)
? data
: [];
for (const k of keys) {
if (typeof k !== "object" || k === null) continue;
const alias = k.key_alias ?? k.keyAlias;
if (alias !== keyAlias) continue;
if (typeof k.token !== "string" || !k.token) continue;
return {
token: k.token,
spend: typeof k.spend === "number" ? k.spend : Number(k.spend) || 0,
maxBudget:
typeof k.max_budget === "number"
? k.max_budget
: k.max_budget == null
? null
: Number(k.max_budget) || null,
budgetDuration:
typeof k.budget_duration === "string" ? k.budget_duration : null,
};
}
return null;
}
/**
* Update a virtual key's budget cap and reset duration.
*
* Pass `maxBudget: null` to remove the cap. Pass `budgetDuration:
* null` to make the budget never reset (lifetime cap).
*
* Identified by `key` parameter — accepts either the raw `sk-...`
* token or its hash (LiteLLM accepts both shapes on /key/update).
* The portal flow uses the hash returned by `findKeyByAlias`.
*/
export async function updateKeyBudget(
key: string,
changes: {
maxBudget?: number | null;
budgetDuration?: string | null;
}
): Promise<void> {
const body: Record<string, any> = { key };
if (changes.maxBudget !== undefined) {
body.max_budget = changes.maxBudget;
}
if (changes.budgetDuration !== undefined) {
body.budget_duration = changes.budgetDuration;
}
await litellmFetch("/key/update", {
method: "POST",
body: JSON.stringify(body),
});
}
/** /**
* Get LiteLLM health status. * Get LiteLLM health status.
*/ */

View File

@@ -189,7 +189,21 @@
"last30Days": "Letzte 30 Tage", "last30Days": "Letzte 30 Tage",
"noData": "Keine Nutzungsdaten verfügbar.", "noData": "Keine Nutzungsdaten verfügbar.",
"dailyBreakdown": "Tagesübersicht", "dailyBreakdown": "Tagesübersicht",
"requests": "Anfragen" "requests": "Anfragen",
"budgetEdit": "Bearbeiten",
"budgetEditTitle": "Budget festlegen",
"budgetEditDescription": "Begrenzen Sie, wie viel die Assistenten dieses Tenants ausgeben können, bevor Anfragen abgelehnt werden.",
"budgetModeUnlimited": "Kein Limit",
"budgetModeUnlimitedDescription": "Beliebige Ausgaben, kein Limit.",
"budgetModeCapped": "Limit festlegen",
"budgetModeCappedDescription": "Anfragen ablehnen, sobald die Ausgaben diesen Betrag erreichen.",
"budgetAmount": "Betrag",
"budgetResetCadence": "Zurücksetzen",
"budgetCadence_30d": "Alle 30 Tage",
"budgetCadence_1mo": "Monatlich",
"budgetCadence_1y": "Jährlich",
"budgetInvalid": "Bitte einen positiven Betrag eingeben.",
"budgetSaveFailed": "Budget konnte nicht gespeichert werden. Bitte erneut versuchen."
}, },
"workspace": { "workspace": {
"save": "Speichern", "save": "Speichern",

View File

@@ -189,7 +189,21 @@
"last30Days": "Last 30 Days", "last30Days": "Last 30 Days",
"noData": "No usage data available.", "noData": "No usage data available.",
"dailyBreakdown": "Daily Breakdown", "dailyBreakdown": "Daily Breakdown",
"requests": "requests" "requests": "requests",
"budgetEdit": "Edit",
"budgetEditTitle": "Set spending budget",
"budgetEditDescription": "Cap how much this tenant's assistants can spend before requests start being declined.",
"budgetModeUnlimited": "No limit",
"budgetModeUnlimitedDescription": "Spend as much as needed; no cap.",
"budgetModeCapped": "Set a cap",
"budgetModeCappedDescription": "Stop accepting requests once spend reaches this amount.",
"budgetAmount": "Amount",
"budgetResetCadence": "Reset",
"budgetCadence_30d": "Every 30 days",
"budgetCadence_1mo": "Monthly",
"budgetCadence_1y": "Yearly",
"budgetInvalid": "Please enter a positive amount.",
"budgetSaveFailed": "Could not save budget. Please try again."
}, },
"workspace": { "workspace": {
"save": "Save", "save": "Save",

View File

@@ -189,7 +189,21 @@
"last30Days": "30 derniers jours", "last30Days": "30 derniers jours",
"noData": "Aucune donnée d'utilisation disponible.", "noData": "Aucune donnée d'utilisation disponible.",
"dailyBreakdown": "Détail journalier", "dailyBreakdown": "Détail journalier",
"requests": "requêtes" "requests": "requêtes",
"budgetEdit": "Modifier",
"budgetEditTitle": "Définir un budget",
"budgetEditDescription": "Limitez la dépense des assistants de ce locataire avant que les requêtes ne soient refusées.",
"budgetModeUnlimited": "Aucune limite",
"budgetModeUnlimitedDescription": "Dépense libre, sans plafond.",
"budgetModeCapped": "Définir un plafond",
"budgetModeCappedDescription": "Refuser les requêtes une fois ce montant atteint.",
"budgetAmount": "Montant",
"budgetResetCadence": "Réinitialisation",
"budgetCadence_30d": "Tous les 30 jours",
"budgetCadence_1mo": "Mensuelle",
"budgetCadence_1y": "Annuelle",
"budgetInvalid": "Veuillez saisir un montant positif.",
"budgetSaveFailed": "Impossible d'enregistrer le budget. Veuillez réessayer."
}, },
"workspace": { "workspace": {
"save": "Enregistrer", "save": "Enregistrer",

View File

@@ -189,7 +189,21 @@
"last30Days": "Ultimi 30 giorni", "last30Days": "Ultimi 30 giorni",
"noData": "Nessun dato di utilizzo disponibile.", "noData": "Nessun dato di utilizzo disponibile.",
"dailyBreakdown": "Dettaglio giornaliero", "dailyBreakdown": "Dettaglio giornaliero",
"requests": "richieste" "requests": "richieste",
"budgetEdit": "Modifica",
"budgetEditTitle": "Imposta budget",
"budgetEditDescription": "Limita quanto gli assistenti di questo tenant possono spendere prima che le richieste vengano rifiutate.",
"budgetModeUnlimited": "Nessun limite",
"budgetModeUnlimitedDescription": "Spesa libera, nessun tetto.",
"budgetModeCapped": "Imposta un tetto",
"budgetModeCappedDescription": "Rifiuta le richieste una volta raggiunto questo importo.",
"budgetAmount": "Importo",
"budgetResetCadence": "Ripristino",
"budgetCadence_30d": "Ogni 30 giorni",
"budgetCadence_1mo": "Mensile",
"budgetCadence_1y": "Annuale",
"budgetInvalid": "Inserisci un importo positivo.",
"budgetSaveFailed": "Impossibile salvare il budget. Riprova."
}, },
"workspace": { "workspace": {
"save": "Salva", "save": "Salva",