Files
pieced-portal/src/app/api/usage/route.ts
admin d375a099f0
All checks were successful
Build and Push / build (push) Successful in 1m26s
Limit by tenant and org
2026-05-02 23:43:02 +02:00

254 lines
9.4 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import { NextRequest, NextResponse } from "next/server";
import { getSessionUser } from "@/lib/session";
import { listTenants } from "@/lib/k8s";
import { listVisibleTenants } from "@/lib/visibility";
import {
getTeamInfo,
getTeamSpendLogsV2,
findKeyByAlias,
} from "@/lib/litellm";
import { safeError } from "@/lib/errors";
/**
* GET /api/usage
*
* Per-tenant spend/token usage for a given month.
*
* Resolution rules (in priority order)
* ------------------------------------
* 1. `?tenant=<name>` query param — the canonical path. The route
* looks up the PiecedTenant CR by name, runs it through the
* viewer's visibility filter, and reads `status.litellmTeamId` +
* `status.litellmKeyAlias`. This is what the tenant-detail page
* calls with for both customers and admins.
* 2. `?teamId=<id>` (+ optional `?keyAlias=<alias>`) — admin escape
* hatch for debugging across orgs (e.g. opening the platform
* panel without a specific tenant in mind). Platform-only;
* ignored for customer sessions.
* 3. No params — 400. We deliberately do NOT fall back to "the
* first visible tenant". Bug 19: that fallback meant siblings
* in the same org showed identical numbers because the API
* always picked the same "first" tenant regardless of which
* detail page the customer was viewing. Forcing callers to be
* explicit makes the bug structurally impossible to reintroduce.
*
* Filtering
* ---------
* LiteLLM's `/spend/logs/v2` accepts a server-side `key_alias` filter.
* We pass it through directly — no more "fetch all team pages and
* post-filter in JS" (which was O(team_total) memory per request and
* masked the routing bug above by being slow enough that nobody
* noticed which alias was actually being used).
*
* The team-level budget is still surfaced as the *org* budget, since
* teams are org-scoped post-Slice-2. That's intentional: the customer
* sees "your company has X budget remaining" alongside "this tenant
* cost Y this month".
*/
export async function GET(req: NextRequest) {
const user = await getSessionUser();
if (!user)
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
const tenantName = req.nextUrl.searchParams.get("tenant");
let teamId: string | null = null;
let keyAlias: string | null = null;
if (tenantName) {
// Path 1: resolve from tenant name with visibility check.
//
// listVisibleTenants enforces the same visibility rules as every
// other read endpoint:
// - platform admins see everything
// - owners see all tenants in their org
// - users see only the tenants they're assigned to (Slice 6)
//
// Filtering through that list rather than reading the CR directly
// means a malicious caller can't probe arbitrary tenant names to
// learn what exists in other orgs.
const allTenants = await listTenants();
const visible = await listVisibleTenants(user, allTenants);
const tenant = visible.find((t) => t.metadata.name === tenantName);
if (!tenant) {
return NextResponse.json(
{ error: "Tenant not found or not accessible" },
{ status: 404 }
);
}
if (!tenant.status?.litellmTeamId) {
// Tenant exists but the operator hasn't reconciled it yet.
// Common right after onboarding; the customer should see a
// friendly empty state, not a 500.
return NextResponse.json(
{ error: "Tenant is still provisioning, no usage data yet" },
{ status: 409 }
);
}
teamId = tenant.status.litellmTeamId;
// litellmKeyAlias is set by the operator's LiteLLM reconcile step
// alongside litellmTeamId, so if teamId is present this should be
// too. Defensive fallback to team-level if missing — in that case
// the customer briefly sees company totals until the next operator
// reconcile, which is better than 500.
keyAlias = tenant.status.litellmKeyAlias ?? null;
} else if (user.isPlatform) {
// Path 2: admin escape hatch.
teamId = req.nextUrl.searchParams.get("teamId");
keyAlias = req.nextUrl.searchParams.get("keyAlias");
if (!teamId) {
return NextResponse.json(
{
error:
"Either ?tenant=<name> or ?teamId=<id> (admin) must be provided",
},
{ status: 400 }
);
}
} else {
// Path 3: no resolution possible. See doc above for why we don't
// pick a default.
return NextResponse.json(
{ error: "Tenant must be specified via ?tenant=<name>" },
{ status: 400 }
);
}
// Month param: YYYY-MM, defaults to current month.
const now = new Date();
const monthParam =
req.nextUrl.searchParams.get("month") ||
`${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`;
const [year, month] = monthParam.split("-").map(Number);
const startDate = new Date(year, month - 1, 1);
const endDate = new Date(year, month, 0); // last day of month
const startStr = startDate.toISOString().split("T")[0];
const endStr = endDate.toISOString().split("T")[0];
try {
const teamInfo = await getTeamInfo(teamId);
// Per-tenant budget lives on the virtual key, not the team
// (Feature 7 fix). When the request is scoped to a specific
// tenant (keyAlias provided), look up the key so we can return
// the per-tenant cap. Tolerate failure — older LiteLLM builds
// or short-lived race conditions during provisioning shouldn't
// 500 the whole usage page; we degrade to "no key info".
const keyInfo = keyAlias
? await findKeyByAlias(teamId, keyAlias).catch(() => null)
: null;
// Page through results — server-side filtered by key_alias when
// provided. Pagination still needed because LiteLLM caps
// page_size at 100, and a busy tenant can easily exceed that in
// a month. With server-side filtering this stays cheap regardless
// of how busy sibling tenants in the same team are.
const allRequests: any[] = [];
let page = 1;
while (true) {
const result = await getTeamSpendLogsV2(
teamId,
startStr,
endStr,
page,
100,
keyAlias
);
allRequests.push(...(result.data || []));
if (page >= (result.total_pages || 1)) break;
page++;
// Defensive cap. A pathological response with bogus total_pages
// shouldn't be able to spin us forever. 50 pages × 100 = 5000
// entries/month/tenant is well above any realistic usage at
// pilot scale.
if (page > 50) break;
}
// Aggregate by day.
const byDay: Record<
string,
{ inputTokens: number; outputTokens: number; spend: number }
> = {};
for (const r of allRequests) {
const day = (r.startTime || r.endTime || "").slice(0, 10);
if (!day) continue;
if (!byDay[day])
byDay[day] = { inputTokens: 0, outputTokens: 0, spend: 0 };
byDay[day].inputTokens += r.prompt_tokens || 0;
byDay[day].outputTokens += r.completion_tokens || 0;
byDay[day].spend += r.spend || 0;
}
const dailyUsage = Object.entries(byDay)
.sort(([a], [b]) => a.localeCompare(b))
.map(([date, d]) => ({ date, ...d }));
const totalInput = allRequests.reduce(
(s, r) => s + (r.prompt_tokens || 0),
0
);
const totalOutput = allRequests.reduce(
(s, r) => s + (r.completion_tokens || 0),
0
);
const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0);
return NextResponse.json({
teamId,
keyAlias, // null when admin queries team-wide (no specific tenant)
month: monthParam,
currentPeriod: {
inputTokens: totalInput,
outputTokens: totalOutput,
totalSpend,
requestCount: allRequests.length,
},
// Budget reporting (Feature 7).
//
// When the caller scopes to a specific tenant (keyAlias set),
// we report THAT tenant's per-key budget — that's what the
// tenant detail page renders, and what the customer expects
// when they see "Budget" on a tenant's page.
//
// When unscoped (admin / org-wide view), we fall back to the
// team budget — that's the org-wide cap, conceptually different
// but the only thing meaningful at that scope.
//
// The two cases display the same way; the editor button gates
// on whether we know which tenant we're on (= keyAlias set).
budget: keyAlias && keyInfo
? {
maxBudget: keyInfo.maxBudget,
spend: keyInfo.spend,
remaining:
keyInfo.maxBudget !== null
? keyInfo.maxBudget - keyInfo.spend
: null,
budgetDuration: keyInfo.budgetDuration,
}
: {
maxBudget: teamInfo?.team_info?.max_budget ?? null,
spend: teamInfo?.team_info?.spend ?? 0,
remaining: teamInfo?.team_info?.max_budget
? teamInfo.team_info.max_budget -
(teamInfo.team_info.spend ?? 0)
: null,
budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
},
rateLimits: {
rpm: teamInfo?.team_info?.rpm_limit ?? null,
tpm: teamInfo?.team_info?.tpm_limit ?? null,
},
dailyUsage,
});
} catch (e: any) {
console.error("Usage fetch error:", e.message);
return NextResponse.json(
{ error: safeError(e, "Failed to fetch usage") },
{ status: 500 }
);
}
}