Group F - Fix spending per tenant
All checks were successful
Build and Push / build (push) Successful in 1m22s

This commit is contained in:
2026-05-01 13:34:56 +02:00
parent 2cf5b56441
commit f308c84325
4 changed files with 169 additions and 92 deletions

View File

@@ -8,64 +8,109 @@ import { safeError } from "@/lib/errors";
/**
* GET /api/usage
*
* Customers: tenant resolved server-side from the user's orgId. The
* response is filtered by the tenant's `litellmKeyAlias` so
* sibling tenants in the same org don't bleed into the total.
* Platform admins: may pass ?teamId=... to inspect any team. They may
* also pass ?keyAlias=... to scope to a single tenant.
* Per-tenant spend/token usage for a given month.
*
* Slice 2 note
* ------------
* LiteLLM teams are now shared across all tenants of an org. The team's
* `/team/info` budget is the *company* budget; the per-tenant numbers
* come from filtering spend logs by `key_alias`. If a tenant has no
* `litellmKeyAlias` in status (transitional state right after upgrade,
* before the operator has reconciled), we fall back to team-level
* filtering — the numbers will be slightly inflated for that one
* reconcile cycle.
* Resolution rules (in priority order)
* ------------------------------------
* 1. `?tenant=<name>` query param — the canonical path. The route
* looks up the PiecedTenant CR by name, runs it through the
* viewer's visibility filter, and reads `status.litellmTeamId` +
* `status.litellmKeyAlias`. This is what the tenant-detail page
* calls with for both customers and admins.
* 2. `?teamId=<id>` (+ optional `?keyAlias=<alias>`) — admin escape
* hatch for debugging across orgs (e.g. opening the platform
* panel without a specific tenant in mind). Platform-only;
* ignored for customer sessions.
* 3. No params — 400. We deliberately do NOT fall back to "the
* first visible tenant". Bug 19: that fallback meant siblings
* in the same org showed identical numbers because the API
* always picked the same "first" tenant regardless of which
* detail page the customer was viewing. Forcing callers to be
* explicit makes the bug structurally impossible to reintroduce.
*
* Filtering
* ---------
* LiteLLM's `/spend/logs/v2` accepts a server-side `key_alias` filter.
* We pass it through directly — no more "fetch all team pages and
* post-filter in JS" (which was O(team_total) memory per request and
* masked the routing bug above by being slow enough that nobody
* noticed which alias was actually being used).
*
* The team-level budget is still surfaced as the *org* budget, since
* teams are org-scoped post-Slice-2. That's intentional: the customer
* sees "your company has X budget remaining" alongside "this tenant
* cost Y this month".
*/
export async function GET(req: NextRequest) {
const user = await getSessionUser();
if (!user)
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const tenantName = req.nextUrl.searchParams.get("tenant");
let teamId: string | null = null;
let keyAlias: string | null = null;
if (user.isPlatform) {
teamId = req.nextUrl.searchParams.get("teamId") ?? null;
keyAlias = req.nextUrl.searchParams.get("keyAlias") ?? null;
}
// For customers (or admins without explicit params): resolve from
// the user's *visible* tenants. With Slice 6, a `user`-role member
// can only see usage for tenants they're assigned to — a non-assigned
// user defaults to "no active tenant" (404).
//
// Owner and platform get the full org-scoped list and pick the first
// tenant, matching the dashboard's "current instance" semantics.
if (!teamId) {
if (tenantName) {
// Path 1: resolve from tenant name with visibility check.
//
// listVisibleTenants enforces the same visibility rules as every
// other read endpoint:
// - platform admins see everything
// - owners see all tenants in their org
// - users see only the tenants they're assigned to (Slice 6)
//
// Filtering through that list rather than reading the CR directly
// means a malicious caller can't probe arbitrary tenant names to
// learn what exists in other orgs.
const allTenants = await listTenants();
const visible = await listVisibleTenants(user, allTenants);
const orgTenant = visible.find((t) => !!t.status?.litellmTeamId);
const tenant = visible.find((t) => t.metadata.name === tenantName);
if (!orgTenant?.status?.litellmTeamId) {
if (!tenant) {
return NextResponse.json(
{ error: "No active tenant found for your organization" },
{ error: "Tenant not found or not accessible" },
{ status: 404 }
);
}
teamId = orgTenant.status.litellmTeamId;
// If the operator has populated the per-tenant key alias, filter by it.
// Falling back to team-level (no alias) will return the org total, which
// is acceptable transitionally but means siblings' usage shows up here.
if (orgTenant.status.litellmKeyAlias) {
keyAlias = orgTenant.status.litellmKeyAlias;
if (!tenant.status?.litellmTeamId) {
// Tenant exists but the operator hasn't reconciled it yet.
// Common right after onboarding; the customer should see a
// friendly empty state, not a 500.
return NextResponse.json(
{ error: "Tenant is still provisioning, no usage data yet" },
{ status: 409 }
);
}
teamId = tenant.status.litellmTeamId;
// litellmKeyAlias is set by the operator's LiteLLM reconcile step
// alongside litellmTeamId, so if teamId is present this should be
// too. Defensive fallback to team-level if missing — in that case
// the customer briefly sees company totals until the next operator
// reconcile, which is better than 500.
keyAlias = tenant.status.litellmKeyAlias ?? null;
} else if (user.isPlatform) {
// Path 2: admin escape hatch.
teamId = req.nextUrl.searchParams.get("teamId");
keyAlias = req.nextUrl.searchParams.get("keyAlias");
if (!teamId) {
return NextResponse.json(
{
error:
"Either ?tenant=<name> or ?teamId=<id> (admin) must be provided",
},
{ status: 400 }
);
}
} else {
// Path 3: no resolution possible. See doc above for why we don't
// pick a default.
return NextResponse.json(
{ error: "Tenant must be specified via ?tenant=<name>" },
{ status: 400 }
);
}
// Month param: YYYY-MM, defaults to current month
// Month param: YYYY-MM, defaults to current month.
const now = new Date();
const monthParam =
req.nextUrl.searchParams.get("month") ||
@@ -81,11 +126,11 @@ export async function GET(req: NextRequest) {
try {
const teamInfo = await getTeamInfo(teamId);
// Fetch all pages from the team. We always query at the team level —
// LiteLLM's /spend/logs/v2 doesn't filter by key_alias reliably across
// versions, so we paginate and post-filter in code. For pilot scale
// this is cheap; if a single team ever exceeds ~10k entries/month we
// can revisit.
// Page through results — server-side filtered by key_alias when
// provided. Pagination still needed because LiteLLM caps
// page_size at 100, and a busy tenant can easily exceed that in
// a month. With server-side filtering this stays cheap regardless
// of how busy sibling tenants in the same team are.
const allRequests: any[] = [];
let page = 1;
while (true) {
@@ -94,33 +139,25 @@ export async function GET(req: NextRequest) {
startStr,
endStr,
page,
100
100,
keyAlias
);
allRequests.push(...(result.data || []));
if (page >= (result.total_pages || 1)) break;
page++;
// Defensive cap. A pathological response with bogus total_pages
// shouldn't be able to spin us forever. 50 pages × 100 = 5000
// entries/month/tenant is well above any realistic usage at
// pilot scale.
if (page > 50) break;
}
// Apply key_alias post-filter when scoping to a single tenant. Match
// both `key_alias` (newer LiteLLM) and `metadata.user_api_key_alias`
// (older builds nest it inside metadata).
const scoped = keyAlias
? allRequests.filter((r) => {
const alias =
r.key_alias ??
r.metadata?.user_api_key_alias ??
r.api_key_alias ??
null;
return alias === keyAlias;
})
: allRequests;
// Aggregate by day
// Aggregate by day.
const byDay: Record<
string,
{ inputTokens: number; outputTokens: number; spend: number }
> = {};
for (const r of scoped) {
for (const r of allRequests) {
const day = (r.startTime || r.endTime || "").slice(0, 10);
if (!day) continue;
if (!byDay[day])
@@ -134,30 +171,30 @@ export async function GET(req: NextRequest) {
.sort(([a], [b]) => a.localeCompare(b))
.map(([date, d]) => ({ date, ...d }));
const totalInput = scoped.reduce(
const totalInput = allRequests.reduce(
(s, r) => s + (r.prompt_tokens || 0),
0
);
const totalOutput = scoped.reduce(
const totalOutput = allRequests.reduce(
(s, r) => s + (r.completion_tokens || 0),
0
);
const totalSpend = scoped.reduce((s, r) => s + (r.spend || 0), 0);
const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0);
return NextResponse.json({
teamId,
keyAlias, // null when not filtering — useful for the client to know it sees company-wide data
keyAlias, // null when admin queries team-wide (no specific tenant)
month: monthParam,
currentPeriod: {
inputTokens: totalInput,
outputTokens: totalOutput,
totalSpend,
requestCount: scoped.length,
requestCount: allRequests.length,
},
// Budget is always team-level (= company budget). Spend reported
// here is the team total, not the per-key total — the customer
// wants to see "how much of our company budget is left", not just
// "how much has this one tenant cost".
// wants to see "how much of our company budget is left", not
// just "how much has this one tenant cost".
budget: {
maxBudget: teamInfo?.team_info?.max_budget ?? null,
spend: teamInfo?.team_info?.spend ?? 0,