Limit by tenant and org
All checks were successful
Build and Push / build (push) Successful in 1m26s

This commit is contained in:
2026-05-02 23:43:02 +02:00
parent 666dd64580
commit d375a099f0
8 changed files with 266 additions and 24 deletions

View File

@@ -0,0 +1,126 @@
import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { findKeyByAlias, updateKeyBudget } from "@/lib/litellm";
import { safeError } from "@/lib/errors";
/**
* Update the per-tenant budget — operates on the LiteLLM virtual
* key, NOT on the team.
*
* Why per-key
* -----------
* Each tenant in an org has its own virtual key
* (`key_alias = tenant.metadata.name`); the team that owns those
* keys is org-scoped and shared across all the org's tenants. A
* budget on the team would cap the whole org; a budget on the key
* caps just this one tenant. Customers landing on the tenant detail
* page reasonably expect "edit budget" to mean "the budget of THIS
* tenant" — so we put it on the key.
*
* The team-level (org-wide) budget is a separate control that lives
* in /settings (not yet implemented) — the two coexist: LiteLLM
* applies whichever cap is hit first.
*
* Schema:
* - maxBudget: number > 0 (set a cap), or null (remove the cap).
* - budgetDuration: one of "30d", "1mo", "1y", or null (lifetime).
*
* Authorization: owners and platform admins.
*/
const patchSchema = z.object({
// > 0 because LiteLLM rejects 0 and a zero cap would lock the key
// out instantly. Upper bound 1M as a typo guard.
maxBudget: z.number().positive().max(1_000_000).nullable(),
budgetDuration: z.enum(["30d", "1mo", "1y"]).nullable(),
});
export async function PATCH(
req: NextRequest,
{ params }: { params: Promise<{ name: string }> }
) {
const user = await getSessionUser();
if (!user) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
if (!canMutate(user)) {
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
}
const { name } = await params;
const tenant = await getTenant(name);
if (!tenant) {
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
if (!(await canUserSeeTenant(user, tenant))) {
// Don't leak existence — same 404 a non-visible tenant gets.
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
const teamId = tenant.status?.litellmTeamId;
if (!teamId) {
return NextResponse.json(
{
error:
"Tenant has no LiteLLM team yet. Please wait until provisioning completes.",
},
{ status: 409 }
);
}
const body = await req.json().catch(() => null);
const parsed = patchSchema.safeParse(body);
if (!parsed.success) {
return NextResponse.json(
{ error: "Invalid input", details: parsed.error.flatten() },
{ status: 400 }
);
}
// Defensive: removing the cap should null out the duration too —
// a reset cadence on an unlimited budget is meaningless and would
// confuse LiteLLM's bookkeeping.
const maxBudget = parsed.data.maxBudget;
const budgetDuration =
maxBudget === null ? null : parsed.data.budgetDuration;
// Look up the key by alias (= tenant name). The token returned is
// what /key/update wants in the `key` field.
let keyInfo;
try {
keyInfo = await findKeyByAlias(teamId, name);
} catch (e: any) {
console.error("Failed to look up tenant key:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to look up tenant key") },
{ status: 500 }
);
}
if (!keyInfo) {
return NextResponse.json(
{
error:
"Tenant has no virtual key yet. Please wait until provisioning completes.",
},
{ status: 409 }
);
}
try {
await updateKeyBudget(keyInfo.token, { maxBudget, budgetDuration });
return NextResponse.json({
message: maxBudget === null ? "Budget removed." : "Budget updated.",
maxBudget,
budgetDuration,
});
} catch (e: any) {
console.error("Failed to update key budget:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to update budget") },
{ status: 500 }
);
}
}

View File

@@ -2,7 +2,11 @@ import { NextRequest, NextResponse } from "next/server";
import { getSessionUser } from "@/lib/session";
import { listTenants } from "@/lib/k8s";
import { listVisibleTenants } from "@/lib/visibility";
import { getTeamInfo, getTeamSpendLogsV2 } from "@/lib/litellm";
import {
getTeamInfo,
getTeamSpendLogsV2,
findKeyByAlias,
} from "@/lib/litellm";
import { safeError } from "@/lib/errors";
/**
@@ -126,6 +130,16 @@ export async function GET(req: NextRequest) {
try {
const teamInfo = await getTeamInfo(teamId);
// Per-tenant budget lives on the virtual key, not the team
// (Feature 7 fix). When the request is scoped to a specific
// tenant (keyAlias provided), look up the key so we can return
// the per-tenant cap. Tolerate failure — older LiteLLM builds
// or short-lived race conditions during provisioning shouldn't
// 500 the whole usage page; we degrade to "no key info".
const keyInfo = keyAlias
? await findKeyByAlias(teamId, keyAlias).catch(() => null)
: null;
// Page through results — server-side filtered by key_alias when
// provided. Pagination still needed because LiteLLM caps
// page_size at 100, and a busy tenant can easily exceed that in
@@ -191,17 +205,38 @@ export async function GET(req: NextRequest) {
totalSpend,
requestCount: allRequests.length,
},
// Budget is always team-level (= company budget). Spend reported
// here is the team total, not the per-key total — the customer
// wants to see "how much of our company budget is left", not
// just "how much has this one tenant cost".
budget: {
maxBudget: teamInfo?.team_info?.max_budget ?? null,
spend: teamInfo?.team_info?.spend ?? 0,
remaining: teamInfo?.team_info?.max_budget
? teamInfo.team_info.max_budget - (teamInfo.team_info.spend ?? 0)
: null,
},
// Budget reporting (Feature 7).
//
// When the caller scopes to a specific tenant (keyAlias set),
// we report THAT tenant's per-key budget — that's what the
// tenant detail page renders, and what the customer expects
// when they see "Budget" on a tenant's page.
//
// When unscoped (admin / org-wide view), we fall back to the
// team budget — that's the org-wide cap, conceptually different
// but the only thing meaningful at that scope.
//
// The two cases display the same way; the editor button gates
// on whether we know which tenant we're on (= keyAlias set).
budget: keyAlias && keyInfo
? {
maxBudget: keyInfo.maxBudget,
spend: keyInfo.spend,
remaining:
keyInfo.maxBudget !== null
? keyInfo.maxBudget - keyInfo.spend
: null,
budgetDuration: keyInfo.budgetDuration,
}
: {
maxBudget: teamInfo?.team_info?.max_budget ?? null,
spend: teamInfo?.team_info?.spend ?? 0,
remaining: teamInfo?.team_info?.max_budget
? teamInfo.team_info.max_budget -
(teamInfo.team_info.spend ?? 0)
: null,
budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
},
rateLimits: {
rpm: teamInfo?.team_info?.rpm_limit ?? null,
tpm: teamInfo?.team_info?.tpm_limit ?? null,