Limit by tenant and org
This commit is contained in:
126
src/app/api/tenants/[name]/budget/route.ts
Normal file
126
src/app/api/tenants/[name]/budget/route.ts
Normal file
@@ -0,0 +1,126 @@
|
||||
import { NextRequest, NextResponse } from "next/server";
|
||||
import { z } from "zod";
|
||||
import { getSessionUser, canMutate } from "@/lib/session";
|
||||
import { getTenant } from "@/lib/k8s";
|
||||
import { canUserSeeTenant } from "@/lib/visibility";
|
||||
import { findKeyByAlias, updateKeyBudget } from "@/lib/litellm";
|
||||
import { safeError } from "@/lib/errors";
|
||||
|
||||
/**
|
||||
* Update the per-tenant budget — operates on the LiteLLM virtual
|
||||
* key, NOT on the team.
|
||||
*
|
||||
* Why per-key
|
||||
* -----------
|
||||
* Each tenant in an org has its own virtual key
|
||||
* (`key_alias = tenant.metadata.name`); the team that owns those
|
||||
* keys is org-scoped and shared across all the org's tenants. A
|
||||
* budget on the team would cap the whole org; a budget on the key
|
||||
* caps just this one tenant. Customers landing on the tenant detail
|
||||
* page reasonably expect "edit budget" to mean "the budget of THIS
|
||||
* tenant" — so we put it on the key.
|
||||
*
|
||||
* The team-level (org-wide) budget is a separate control that lives
|
||||
* in /settings (not yet implemented) — the two coexist: LiteLLM
|
||||
* applies whichever cap is hit first.
|
||||
*
|
||||
* Schema:
|
||||
* - maxBudget: number > 0 (set a cap), or null (remove the cap).
|
||||
* - budgetDuration: one of "30d", "1mo", "1y", or null (lifetime).
|
||||
*
|
||||
* Authorization: owners and platform admins.
|
||||
*/
|
||||
|
||||
const patchSchema = z.object({
|
||||
// > 0 because LiteLLM rejects 0 and a zero cap would lock the key
|
||||
// out instantly. Upper bound 1M as a typo guard.
|
||||
maxBudget: z.number().positive().max(1_000_000).nullable(),
|
||||
budgetDuration: z.enum(["30d", "1mo", "1y"]).nullable(),
|
||||
});
|
||||
|
||||
export async function PATCH(
|
||||
req: NextRequest,
|
||||
{ params }: { params: Promise<{ name: string }> }
|
||||
) {
|
||||
const user = await getSessionUser();
|
||||
if (!user) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
}
|
||||
if (!canMutate(user)) {
|
||||
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
|
||||
}
|
||||
|
||||
const { name } = await params;
|
||||
const tenant = await getTenant(name);
|
||||
if (!tenant) {
|
||||
return NextResponse.json({ error: "Not found" }, { status: 404 });
|
||||
}
|
||||
if (!(await canUserSeeTenant(user, tenant))) {
|
||||
// Don't leak existence — same 404 a non-visible tenant gets.
|
||||
return NextResponse.json({ error: "Not found" }, { status: 404 });
|
||||
}
|
||||
|
||||
const teamId = tenant.status?.litellmTeamId;
|
||||
if (!teamId) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error:
|
||||
"Tenant has no LiteLLM team yet. Please wait until provisioning completes.",
|
||||
},
|
||||
{ status: 409 }
|
||||
);
|
||||
}
|
||||
|
||||
const body = await req.json().catch(() => null);
|
||||
const parsed = patchSchema.safeParse(body);
|
||||
if (!parsed.success) {
|
||||
return NextResponse.json(
|
||||
{ error: "Invalid input", details: parsed.error.flatten() },
|
||||
{ status: 400 }
|
||||
);
|
||||
}
|
||||
|
||||
// Defensive: removing the cap should null out the duration too —
|
||||
// a reset cadence on an unlimited budget is meaningless and would
|
||||
// confuse LiteLLM's bookkeeping.
|
||||
const maxBudget = parsed.data.maxBudget;
|
||||
const budgetDuration =
|
||||
maxBudget === null ? null : parsed.data.budgetDuration;
|
||||
|
||||
// Look up the key by alias (= tenant name). The token returned is
|
||||
// what /key/update wants in the `key` field.
|
||||
let keyInfo;
|
||||
try {
|
||||
keyInfo = await findKeyByAlias(teamId, name);
|
||||
} catch (e: any) {
|
||||
console.error("Failed to look up tenant key:", e);
|
||||
return NextResponse.json(
|
||||
{ error: safeError(e, "Failed to look up tenant key") },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
if (!keyInfo) {
|
||||
return NextResponse.json(
|
||||
{
|
||||
error:
|
||||
"Tenant has no virtual key yet. Please wait until provisioning completes.",
|
||||
},
|
||||
{ status: 409 }
|
||||
);
|
||||
}
|
||||
|
||||
try {
|
||||
await updateKeyBudget(keyInfo.token, { maxBudget, budgetDuration });
|
||||
return NextResponse.json({
|
||||
message: maxBudget === null ? "Budget removed." : "Budget updated.",
|
||||
maxBudget,
|
||||
budgetDuration,
|
||||
});
|
||||
} catch (e: any) {
|
||||
console.error("Failed to update key budget:", e);
|
||||
return NextResponse.json(
|
||||
{ error: safeError(e, "Failed to update budget") },
|
||||
{ status: 500 }
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -2,7 +2,11 @@ import { NextRequest, NextResponse } from "next/server";
|
||||
import { getSessionUser } from "@/lib/session";
|
||||
import { listTenants } from "@/lib/k8s";
|
||||
import { listVisibleTenants } from "@/lib/visibility";
|
||||
import { getTeamInfo, getTeamSpendLogsV2 } from "@/lib/litellm";
|
||||
import {
|
||||
getTeamInfo,
|
||||
getTeamSpendLogsV2,
|
||||
findKeyByAlias,
|
||||
} from "@/lib/litellm";
|
||||
import { safeError } from "@/lib/errors";
|
||||
|
||||
/**
|
||||
@@ -126,6 +130,16 @@ export async function GET(req: NextRequest) {
|
||||
try {
|
||||
const teamInfo = await getTeamInfo(teamId);
|
||||
|
||||
// Per-tenant budget lives on the virtual key, not the team
|
||||
// (Feature 7 fix). When the request is scoped to a specific
|
||||
// tenant (keyAlias provided), look up the key so we can return
|
||||
// the per-tenant cap. Tolerate failure — older LiteLLM builds
|
||||
// or short-lived race conditions during provisioning shouldn't
|
||||
// 500 the whole usage page; we degrade to "no key info".
|
||||
const keyInfo = keyAlias
|
||||
? await findKeyByAlias(teamId, keyAlias).catch(() => null)
|
||||
: null;
|
||||
|
||||
// Page through results — server-side filtered by key_alias when
|
||||
// provided. Pagination still needed because LiteLLM caps
|
||||
// page_size at 100, and a busy tenant can easily exceed that in
|
||||
@@ -191,17 +205,38 @@ export async function GET(req: NextRequest) {
|
||||
totalSpend,
|
||||
requestCount: allRequests.length,
|
||||
},
|
||||
// Budget is always team-level (= company budget). Spend reported
|
||||
// here is the team total, not the per-key total — the customer
|
||||
// wants to see "how much of our company budget is left", not
|
||||
// just "how much has this one tenant cost".
|
||||
budget: {
|
||||
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
||||
spend: teamInfo?.team_info?.spend ?? 0,
|
||||
remaining: teamInfo?.team_info?.max_budget
|
||||
? teamInfo.team_info.max_budget - (teamInfo.team_info.spend ?? 0)
|
||||
: null,
|
||||
},
|
||||
// Budget reporting (Feature 7).
|
||||
//
|
||||
// When the caller scopes to a specific tenant (keyAlias set),
|
||||
// we report THAT tenant's per-key budget — that's what the
|
||||
// tenant detail page renders, and what the customer expects
|
||||
// when they see "Budget" on a tenant's page.
|
||||
//
|
||||
// When unscoped (admin / org-wide view), we fall back to the
|
||||
// team budget — that's the org-wide cap, conceptually different
|
||||
// but the only thing meaningful at that scope.
|
||||
//
|
||||
// The two cases display the same way; the editor button gates
|
||||
// on whether we know which tenant we're on (= keyAlias set).
|
||||
budget: keyAlias && keyInfo
|
||||
? {
|
||||
maxBudget: keyInfo.maxBudget,
|
||||
spend: keyInfo.spend,
|
||||
remaining:
|
||||
keyInfo.maxBudget !== null
|
||||
? keyInfo.maxBudget - keyInfo.spend
|
||||
: null,
|
||||
budgetDuration: keyInfo.budgetDuration,
|
||||
}
|
||||
: {
|
||||
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
||||
spend: teamInfo?.team_info?.spend ?? 0,
|
||||
remaining: teamInfo?.team_info?.max_budget
|
||||
? teamInfo.team_info.max_budget -
|
||||
(teamInfo.team_info.spend ?? 0)
|
||||
: null,
|
||||
budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
|
||||
},
|
||||
rateLimits: {
|
||||
rpm: teamInfo?.team_info?.rpm_limit ?? null,
|
||||
tpm: teamInfo?.team_info?.tpm_limit ?? null,
|
||||
|
||||
@@ -162,12 +162,9 @@ export function BudgetEditableCard({
|
||||
<h3 className="font-display text-lg font-semibold text-text-primary mb-2">
|
||||
{t("budgetEditTitle")}
|
||||
</h3>
|
||||
<p className="text-sm text-text-secondary mb-4">
|
||||
<p className="text-sm text-text-secondary mb-5">
|
||||
{t("budgetEditDescription")}
|
||||
</p>
|
||||
<div className="text-xs text-amber-400 bg-amber-400/10 border border-amber-400/20 rounded-lg px-3 py-2 mb-5">
|
||||
{t("budgetOrgScopeWarning")}
|
||||
</div>
|
||||
|
||||
<form onSubmit={onSubmit} className="space-y-4">
|
||||
{/* Mode toggle: unlimited vs capped. Two radios are
|
||||
|
||||
@@ -93,6 +93,94 @@ export async function listTeams(): Promise<any[]> {
|
||||
return Array.isArray(data) ? data : data?.data ?? data?.teams ?? [];
|
||||
}
|
||||
|
||||
/**
|
||||
* Find a virtual key on a team by its alias and return its current
|
||||
* state (token, spend, budget cap, reset cadence). Returns null if
|
||||
* the alias doesn't match any key on the team.
|
||||
*
|
||||
* Why we need this
|
||||
* ----------------
|
||||
* Per-tenant budgets live on the virtual key, not the team. The
|
||||
* portal needs to:
|
||||
* 1. Display the current key's `max_budget` / `budget_duration` /
|
||||
* `spend` on the tenant detail page.
|
||||
* 2. Pass the key's `token` to `/key/update` when the customer
|
||||
* changes the budget.
|
||||
*
|
||||
* The token is opaque to the customer; the operator's
|
||||
* `FindKeyByAlias` does the same lookup for stale-key cleanup. We
|
||||
* mirror its API call here.
|
||||
*/
|
||||
export async function findKeyByAlias(
|
||||
teamId: string,
|
||||
keyAlias: string
|
||||
): Promise<{
|
||||
token: string;
|
||||
spend: number;
|
||||
maxBudget: number | null;
|
||||
budgetDuration: string | null;
|
||||
} | null> {
|
||||
const data = await litellmFetch(
|
||||
`/key/list?team_id=${encodeURIComponent(teamId)}&return_full_object=true&include_team_keys=true`
|
||||
);
|
||||
const keys: any[] = Array.isArray(data?.keys)
|
||||
? data.keys
|
||||
: Array.isArray(data?.data)
|
||||
? data.data
|
||||
: Array.isArray(data)
|
||||
? data
|
||||
: [];
|
||||
for (const k of keys) {
|
||||
if (typeof k !== "object" || k === null) continue;
|
||||
const alias = k.key_alias ?? k.keyAlias;
|
||||
if (alias !== keyAlias) continue;
|
||||
if (typeof k.token !== "string" || !k.token) continue;
|
||||
return {
|
||||
token: k.token,
|
||||
spend: typeof k.spend === "number" ? k.spend : Number(k.spend) || 0,
|
||||
maxBudget:
|
||||
typeof k.max_budget === "number"
|
||||
? k.max_budget
|
||||
: k.max_budget == null
|
||||
? null
|
||||
: Number(k.max_budget) || null,
|
||||
budgetDuration:
|
||||
typeof k.budget_duration === "string" ? k.budget_duration : null,
|
||||
};
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a virtual key's budget cap and reset duration.
|
||||
*
|
||||
* Pass `maxBudget: null` to remove the cap. Pass `budgetDuration:
|
||||
* null` to make the budget never reset (lifetime cap).
|
||||
*
|
||||
* Identified by `key` parameter — accepts either the raw `sk-...`
|
||||
* token or its hash (LiteLLM accepts both shapes on /key/update).
|
||||
* The portal flow uses the hash returned by `findKeyByAlias`.
|
||||
*/
|
||||
export async function updateKeyBudget(
|
||||
key: string,
|
||||
changes: {
|
||||
maxBudget?: number | null;
|
||||
budgetDuration?: string | null;
|
||||
}
|
||||
): Promise<void> {
|
||||
const body: Record<string, any> = { key };
|
||||
if (changes.maxBudget !== undefined) {
|
||||
body.max_budget = changes.maxBudget;
|
||||
}
|
||||
if (changes.budgetDuration !== undefined) {
|
||||
body.budget_duration = changes.budgetDuration;
|
||||
}
|
||||
await litellmFetch("/key/update", {
|
||||
method: "POST",
|
||||
body: JSON.stringify(body),
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get LiteLLM health status.
|
||||
*/
|
||||
|
||||
@@ -192,8 +192,7 @@
|
||||
"requests": "Anfragen",
|
||||
"budgetEdit": "Bearbeiten",
|
||||
"budgetEditTitle": "Budget festlegen",
|
||||
"budgetEditDescription": "Begrenzen Sie, wie viel Ihre Assistenten ausgeben können, bevor Anfragen abgelehnt werden.",
|
||||
"budgetOrgScopeWarning": "Dieses Budget gilt für alle Tenants Ihrer Organisation, nicht nur für diesen. Bei mehreren Tenants teilen sich diese das Limit.",
|
||||
"budgetEditDescription": "Begrenzen Sie, wie viel die Assistenten dieses Tenants ausgeben können, bevor Anfragen abgelehnt werden.",
|
||||
"budgetModeUnlimited": "Kein Limit",
|
||||
"budgetModeUnlimitedDescription": "Beliebige Ausgaben, kein Limit.",
|
||||
"budgetModeCapped": "Limit festlegen",
|
||||
|
||||
@@ -192,8 +192,7 @@
|
||||
"requests": "requests",
|
||||
"budgetEdit": "Edit",
|
||||
"budgetEditTitle": "Set spending budget",
|
||||
"budgetEditDescription": "Cap how much your assistants can spend before requests start being declined.",
|
||||
"budgetOrgScopeWarning": "This budget applies to all tenants in your organization, not just this one. If you have multiple tenants, they share the same cap.",
|
||||
"budgetEditDescription": "Cap how much this tenant's assistants can spend before requests start being declined.",
|
||||
"budgetModeUnlimited": "No limit",
|
||||
"budgetModeUnlimitedDescription": "Spend as much as needed; no cap.",
|
||||
"budgetModeCapped": "Set a cap",
|
||||
|
||||
@@ -192,8 +192,7 @@
|
||||
"requests": "requêtes",
|
||||
"budgetEdit": "Modifier",
|
||||
"budgetEditTitle": "Définir un budget",
|
||||
"budgetEditDescription": "Limitez la dépense de vos assistants avant que les requêtes ne soient refusées.",
|
||||
"budgetOrgScopeWarning": "Ce budget s'applique à tous les locataires de votre organisation, pas seulement à celui-ci. Si vous avez plusieurs locataires, ils partagent le même plafond.",
|
||||
"budgetEditDescription": "Limitez la dépense des assistants de ce locataire avant que les requêtes ne soient refusées.",
|
||||
"budgetModeUnlimited": "Aucune limite",
|
||||
"budgetModeUnlimitedDescription": "Dépense libre, sans plafond.",
|
||||
"budgetModeCapped": "Définir un plafond",
|
||||
|
||||
@@ -192,8 +192,7 @@
|
||||
"requests": "richieste",
|
||||
"budgetEdit": "Modifica",
|
||||
"budgetEditTitle": "Imposta budget",
|
||||
"budgetEditDescription": "Limita quanto i tuoi assistenti possono spendere prima che le richieste vengano rifiutate.",
|
||||
"budgetOrgScopeWarning": "Questo budget si applica a tutti i tenant della tua organizzazione, non solo a questo. Se hai più tenant, condividono lo stesso limite.",
|
||||
"budgetEditDescription": "Limita quanto gli assistenti di questo tenant possono spendere prima che le richieste vengano rifiutate.",
|
||||
"budgetModeUnlimited": "Nessun limite",
|
||||
"budgetModeUnlimitedDescription": "Spesa libera, nessun tetto.",
|
||||
"budgetModeCapped": "Imposta un tetto",
|
||||
|
||||
Reference in New Issue
Block a user