Limit by tenant and org
All checks were successful
Build and Push / build (push) Successful in 1m26s
All checks were successful
Build and Push / build (push) Successful in 1m26s
This commit is contained in:
@@ -2,7 +2,11 @@ import { NextRequest, NextResponse } from "next/server";
|
||||
import { getSessionUser } from "@/lib/session";
|
||||
import { listTenants } from "@/lib/k8s";
|
||||
import { listVisibleTenants } from "@/lib/visibility";
|
||||
import { getTeamInfo, getTeamSpendLogsV2 } from "@/lib/litellm";
|
||||
import {
|
||||
getTeamInfo,
|
||||
getTeamSpendLogsV2,
|
||||
findKeyByAlias,
|
||||
} from "@/lib/litellm";
|
||||
import { safeError } from "@/lib/errors";
|
||||
|
||||
/**
|
||||
@@ -126,6 +130,16 @@ export async function GET(req: NextRequest) {
|
||||
try {
|
||||
const teamInfo = await getTeamInfo(teamId);
|
||||
|
||||
// Per-tenant budget lives on the virtual key, not the team
|
||||
// (Feature 7 fix). When the request is scoped to a specific
|
||||
// tenant (keyAlias provided), look up the key so we can return
|
||||
// the per-tenant cap. Tolerate failure — older LiteLLM builds
|
||||
// or short-lived race conditions during provisioning shouldn't
|
||||
// 500 the whole usage page; we degrade to "no key info".
|
||||
const keyInfo = keyAlias
|
||||
? await findKeyByAlias(teamId, keyAlias).catch(() => null)
|
||||
: null;
|
||||
|
||||
// Page through results — server-side filtered by key_alias when
|
||||
// provided. Pagination still needed because LiteLLM caps
|
||||
// page_size at 100, and a busy tenant can easily exceed that in
|
||||
@@ -191,17 +205,38 @@ export async function GET(req: NextRequest) {
|
||||
totalSpend,
|
||||
requestCount: allRequests.length,
|
||||
},
|
||||
// Budget is always team-level (= company budget). Spend reported
|
||||
// here is the team total, not the per-key total — the customer
|
||||
// wants to see "how much of our company budget is left", not
|
||||
// just "how much has this one tenant cost".
|
||||
budget: {
|
||||
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
||||
spend: teamInfo?.team_info?.spend ?? 0,
|
||||
remaining: teamInfo?.team_info?.max_budget
|
||||
? teamInfo.team_info.max_budget - (teamInfo.team_info.spend ?? 0)
|
||||
: null,
|
||||
},
|
||||
// Budget reporting (Feature 7).
|
||||
//
|
||||
// When the caller scopes to a specific tenant (keyAlias set),
|
||||
// we report THAT tenant's per-key budget — that's what the
|
||||
// tenant detail page renders, and what the customer expects
|
||||
// when they see "Budget" on a tenant's page.
|
||||
//
|
||||
// When unscoped (admin / org-wide view), we fall back to the
|
||||
// team budget — that's the org-wide cap, conceptually different
|
||||
// but the only thing meaningful at that scope.
|
||||
//
|
||||
// The two cases display the same way; the editor button gates
|
||||
// on whether we know which tenant we're on (= keyAlias set).
|
||||
budget: keyAlias && keyInfo
|
||||
? {
|
||||
maxBudget: keyInfo.maxBudget,
|
||||
spend: keyInfo.spend,
|
||||
remaining:
|
||||
keyInfo.maxBudget !== null
|
||||
? keyInfo.maxBudget - keyInfo.spend
|
||||
: null,
|
||||
budgetDuration: keyInfo.budgetDuration,
|
||||
}
|
||||
: {
|
||||
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
||||
spend: teamInfo?.team_info?.spend ?? 0,
|
||||
remaining: teamInfo?.team_info?.max_budget
|
||||
? teamInfo.team_info.max_budget -
|
||||
(teamInfo.team_info.spend ?? 0)
|
||||
: null,
|
||||
budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
|
||||
},
|
||||
rateLimits: {
|
||||
rpm: teamInfo?.team_info?.rpm_limit ?? null,
|
||||
tpm: teamInfo?.team_info?.tpm_limit ?? null,
|
||||
|
||||
Reference in New Issue
Block a user