254 lines
9.4 KiB
TypeScript
254 lines
9.4 KiB
TypeScript
import { NextRequest, NextResponse } from "next/server";
|
||
import { getSessionUser } from "@/lib/session";
|
||
import { listTenants } from "@/lib/k8s";
|
||
import { listVisibleTenants } from "@/lib/visibility";
|
||
import {
|
||
getTeamInfo,
|
||
getTeamSpendLogsV2,
|
||
findKeyByAlias,
|
||
} from "@/lib/litellm";
|
||
import { safeError } from "@/lib/errors";
|
||
|
||
/**
|
||
* GET /api/usage
|
||
*
|
||
* Per-tenant spend/token usage for a given month.
|
||
*
|
||
* Resolution rules (in priority order)
|
||
* ------------------------------------
|
||
* 1. `?tenant=<name>` query param — the canonical path. The route
|
||
* looks up the PiecedTenant CR by name, runs it through the
|
||
* viewer's visibility filter, and reads `status.litellmTeamId` +
|
||
* `status.litellmKeyAlias`. This is what the tenant-detail page
|
||
* calls with for both customers and admins.
|
||
* 2. `?teamId=<id>` (+ optional `?keyAlias=<alias>`) — admin escape
|
||
* hatch for debugging across orgs (e.g. opening the platform
|
||
* panel without a specific tenant in mind). Platform-only;
|
||
* ignored for customer sessions.
|
||
* 3. No params — 400. We deliberately do NOT fall back to "the
|
||
* first visible tenant". Bug 19: that fallback meant siblings
|
||
* in the same org showed identical numbers because the API
|
||
* always picked the same "first" tenant regardless of which
|
||
* detail page the customer was viewing. Forcing callers to be
|
||
* explicit makes the bug structurally impossible to reintroduce.
|
||
*
|
||
* Filtering
|
||
* ---------
|
||
* LiteLLM's `/spend/logs/v2` accepts a server-side `key_alias` filter.
|
||
* We pass it through directly — no more "fetch all team pages and
|
||
* post-filter in JS" (which was O(team_total) memory per request and
|
||
* masked the routing bug above by being slow enough that nobody
|
||
* noticed which alias was actually being used).
|
||
*
|
||
* The team-level budget is still surfaced as the *org* budget, since
|
||
* teams are org-scoped post-Slice-2. That's intentional: the customer
|
||
* sees "your company has X budget remaining" alongside "this tenant
|
||
* cost Y this month".
|
||
*/
|
||
export async function GET(req: NextRequest) {
|
||
const user = await getSessionUser();
|
||
if (!user)
|
||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||
|
||
const tenantName = req.nextUrl.searchParams.get("tenant");
|
||
let teamId: string | null = null;
|
||
let keyAlias: string | null = null;
|
||
|
||
if (tenantName) {
|
||
// Path 1: resolve from tenant name with visibility check.
|
||
//
|
||
// listVisibleTenants enforces the same visibility rules as every
|
||
// other read endpoint:
|
||
// - platform admins see everything
|
||
// - owners see all tenants in their org
|
||
// - users see only the tenants they're assigned to (Slice 6)
|
||
//
|
||
// Filtering through that list rather than reading the CR directly
|
||
// means a malicious caller can't probe arbitrary tenant names to
|
||
// learn what exists in other orgs.
|
||
const allTenants = await listTenants();
|
||
const visible = await listVisibleTenants(user, allTenants);
|
||
const tenant = visible.find((t) => t.metadata.name === tenantName);
|
||
|
||
if (!tenant) {
|
||
return NextResponse.json(
|
||
{ error: "Tenant not found or not accessible" },
|
||
{ status: 404 }
|
||
);
|
||
}
|
||
if (!tenant.status?.litellmTeamId) {
|
||
// Tenant exists but the operator hasn't reconciled it yet.
|
||
// Common right after onboarding; the customer should see a
|
||
// friendly empty state, not a 500.
|
||
return NextResponse.json(
|
||
{ error: "Tenant is still provisioning, no usage data yet" },
|
||
{ status: 409 }
|
||
);
|
||
}
|
||
teamId = tenant.status.litellmTeamId;
|
||
// litellmKeyAlias is set by the operator's LiteLLM reconcile step
|
||
// alongside litellmTeamId, so if teamId is present this should be
|
||
// too. Defensive fallback to team-level if missing — in that case
|
||
// the customer briefly sees company totals until the next operator
|
||
// reconcile, which is better than 500.
|
||
keyAlias = tenant.status.litellmKeyAlias ?? null;
|
||
} else if (user.isPlatform) {
|
||
// Path 2: admin escape hatch.
|
||
teamId = req.nextUrl.searchParams.get("teamId");
|
||
keyAlias = req.nextUrl.searchParams.get("keyAlias");
|
||
if (!teamId) {
|
||
return NextResponse.json(
|
||
{
|
||
error:
|
||
"Either ?tenant=<name> or ?teamId=<id> (admin) must be provided",
|
||
},
|
||
{ status: 400 }
|
||
);
|
||
}
|
||
} else {
|
||
// Path 3: no resolution possible. See doc above for why we don't
|
||
// pick a default.
|
||
return NextResponse.json(
|
||
{ error: "Tenant must be specified via ?tenant=<name>" },
|
||
{ status: 400 }
|
||
);
|
||
}
|
||
|
||
// Month param: YYYY-MM, defaults to current month.
|
||
const now = new Date();
|
||
const monthParam =
|
||
req.nextUrl.searchParams.get("month") ||
|
||
`${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`;
|
||
|
||
const [year, month] = monthParam.split("-").map(Number);
|
||
const startDate = new Date(year, month - 1, 1);
|
||
const endDate = new Date(year, month, 0); // last day of month
|
||
|
||
const startStr = startDate.toISOString().split("T")[0];
|
||
const endStr = endDate.toISOString().split("T")[0];
|
||
|
||
try {
|
||
const teamInfo = await getTeamInfo(teamId);
|
||
|
||
// Per-tenant budget lives on the virtual key, not the team
|
||
// (Feature 7 fix). When the request is scoped to a specific
|
||
// tenant (keyAlias provided), look up the key so we can return
|
||
// the per-tenant cap. Tolerate failure — older LiteLLM builds
|
||
// or short-lived race conditions during provisioning shouldn't
|
||
// 500 the whole usage page; we degrade to "no key info".
|
||
const keyInfo = keyAlias
|
||
? await findKeyByAlias(teamId, keyAlias).catch(() => null)
|
||
: null;
|
||
|
||
// Page through results — server-side filtered by key_alias when
|
||
// provided. Pagination still needed because LiteLLM caps
|
||
// page_size at 100, and a busy tenant can easily exceed that in
|
||
// a month. With server-side filtering this stays cheap regardless
|
||
// of how busy sibling tenants in the same team are.
|
||
const allRequests: any[] = [];
|
||
let page = 1;
|
||
while (true) {
|
||
const result = await getTeamSpendLogsV2(
|
||
teamId,
|
||
startStr,
|
||
endStr,
|
||
page,
|
||
100,
|
||
keyAlias
|
||
);
|
||
allRequests.push(...(result.data || []));
|
||
if (page >= (result.total_pages || 1)) break;
|
||
page++;
|
||
// Defensive cap. A pathological response with bogus total_pages
|
||
// shouldn't be able to spin us forever. 50 pages × 100 = 5000
|
||
// entries/month/tenant is well above any realistic usage at
|
||
// pilot scale.
|
||
if (page > 50) break;
|
||
}
|
||
|
||
// Aggregate by day.
|
||
const byDay: Record<
|
||
string,
|
||
{ inputTokens: number; outputTokens: number; spend: number }
|
||
> = {};
|
||
for (const r of allRequests) {
|
||
const day = (r.startTime || r.endTime || "").slice(0, 10);
|
||
if (!day) continue;
|
||
if (!byDay[day])
|
||
byDay[day] = { inputTokens: 0, outputTokens: 0, spend: 0 };
|
||
byDay[day].inputTokens += r.prompt_tokens || 0;
|
||
byDay[day].outputTokens += r.completion_tokens || 0;
|
||
byDay[day].spend += r.spend || 0;
|
||
}
|
||
|
||
const dailyUsage = Object.entries(byDay)
|
||
.sort(([a], [b]) => a.localeCompare(b))
|
||
.map(([date, d]) => ({ date, ...d }));
|
||
|
||
const totalInput = allRequests.reduce(
|
||
(s, r) => s + (r.prompt_tokens || 0),
|
||
0
|
||
);
|
||
const totalOutput = allRequests.reduce(
|
||
(s, r) => s + (r.completion_tokens || 0),
|
||
0
|
||
);
|
||
const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0);
|
||
|
||
return NextResponse.json({
|
||
teamId,
|
||
keyAlias, // null when admin queries team-wide (no specific tenant)
|
||
month: monthParam,
|
||
currentPeriod: {
|
||
inputTokens: totalInput,
|
||
outputTokens: totalOutput,
|
||
totalSpend,
|
||
requestCount: allRequests.length,
|
||
},
|
||
// Budget reporting (Feature 7).
|
||
//
|
||
// When the caller scopes to a specific tenant (keyAlias set),
|
||
// we report THAT tenant's per-key budget — that's what the
|
||
// tenant detail page renders, and what the customer expects
|
||
// when they see "Budget" on a tenant's page.
|
||
//
|
||
// When unscoped (admin / org-wide view), we fall back to the
|
||
// team budget — that's the org-wide cap, conceptually different
|
||
// but the only thing meaningful at that scope.
|
||
//
|
||
// The two cases display the same way; the editor button gates
|
||
// on whether we know which tenant we're on (= keyAlias set).
|
||
budget: keyAlias && keyInfo
|
||
? {
|
||
maxBudget: keyInfo.maxBudget,
|
||
spend: keyInfo.spend,
|
||
remaining:
|
||
keyInfo.maxBudget !== null
|
||
? keyInfo.maxBudget - keyInfo.spend
|
||
: null,
|
||
budgetDuration: keyInfo.budgetDuration,
|
||
}
|
||
: {
|
||
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
||
spend: teamInfo?.team_info?.spend ?? 0,
|
||
remaining: teamInfo?.team_info?.max_budget
|
||
? teamInfo.team_info.max_budget -
|
||
(teamInfo.team_info.spend ?? 0)
|
||
: null,
|
||
budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
|
||
},
|
||
rateLimits: {
|
||
rpm: teamInfo?.team_info?.rpm_limit ?? null,
|
||
tpm: teamInfo?.team_info?.tpm_limit ?? null,
|
||
},
|
||
dailyUsage,
|
||
});
|
||
} catch (e: any) {
|
||
console.error("Usage fetch error:", e.message);
|
||
return NextResponse.json(
|
||
{ error: safeError(e, "Failed to fetch usage") },
|
||
{ status: 500 }
|
||
);
|
||
}
|
||
}
|