182 lines
6.4 KiB
TypeScript
182 lines
6.4 KiB
TypeScript
import { NextRequest, NextResponse } from "next/server";
|
|
import { getSessionUser } from "@/lib/session";
|
|
import { listTenants } from "@/lib/k8s";
|
|
import { listVisibleTenants } from "@/lib/visibility";
|
|
import { getTeamInfo, getTeamSpendLogsV2 } from "@/lib/litellm";
|
|
import { safeError } from "@/lib/errors";
|
|
|
|
/**
|
|
* GET /api/usage
|
|
*
|
|
* Customers: tenant resolved server-side from the user's orgId. The
|
|
* response is filtered by the tenant's `litellmKeyAlias` so
|
|
* sibling tenants in the same org don't bleed into the total.
|
|
* Platform admins: may pass ?teamId=... to inspect any team. They may
|
|
* also pass ?keyAlias=... to scope to a single tenant.
|
|
*
|
|
* Slice 2 note
|
|
* ------------
|
|
* LiteLLM teams are now shared across all tenants of an org. The team's
|
|
* `/team/info` budget is the *company* budget; the per-tenant numbers
|
|
* come from filtering spend logs by `key_alias`. If a tenant has no
|
|
* `litellmKeyAlias` in status (transitional state right after upgrade,
|
|
* before the operator has reconciled), we fall back to team-level
|
|
* filtering — the numbers will be slightly inflated for that one
|
|
* reconcile cycle.
|
|
*/
|
|
export async function GET(req: NextRequest) {
|
|
const user = await getSessionUser();
|
|
if (!user)
|
|
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
|
|
|
let teamId: string | null = null;
|
|
let keyAlias: string | null = null;
|
|
|
|
if (user.isPlatform) {
|
|
teamId = req.nextUrl.searchParams.get("teamId") ?? null;
|
|
keyAlias = req.nextUrl.searchParams.get("keyAlias") ?? null;
|
|
}
|
|
|
|
// For customers (or admins without explicit params): resolve from
|
|
// the user's *visible* tenants. With Slice 6, a `user`-role member
|
|
// can only see usage for tenants they're assigned to — a non-assigned
|
|
// user defaults to "no active tenant" (404).
|
|
//
|
|
// Owner and platform get the full org-scoped list and pick the first
|
|
// tenant, matching the dashboard's "current instance" semantics.
|
|
if (!teamId) {
|
|
const allTenants = await listTenants();
|
|
const visible = await listVisibleTenants(user, allTenants);
|
|
const orgTenant = visible.find((t) => !!t.status?.litellmTeamId);
|
|
|
|
if (!orgTenant?.status?.litellmTeamId) {
|
|
return NextResponse.json(
|
|
{ error: "No active tenant found for your organization" },
|
|
{ status: 404 }
|
|
);
|
|
}
|
|
teamId = orgTenant.status.litellmTeamId;
|
|
|
|
// If the operator has populated the per-tenant key alias, filter by it.
|
|
// Falling back to team-level (no alias) will return the org total, which
|
|
// is acceptable transitionally but means siblings' usage shows up here.
|
|
if (orgTenant.status.litellmKeyAlias) {
|
|
keyAlias = orgTenant.status.litellmKeyAlias;
|
|
}
|
|
}
|
|
|
|
// Month param: YYYY-MM, defaults to current month
|
|
const now = new Date();
|
|
const monthParam =
|
|
req.nextUrl.searchParams.get("month") ||
|
|
`${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`;
|
|
|
|
const [year, month] = monthParam.split("-").map(Number);
|
|
const startDate = new Date(year, month - 1, 1);
|
|
const endDate = new Date(year, month, 0); // last day of month
|
|
|
|
const startStr = startDate.toISOString().split("T")[0];
|
|
const endStr = endDate.toISOString().split("T")[0];
|
|
|
|
try {
|
|
const teamInfo = await getTeamInfo(teamId);
|
|
|
|
// Fetch all pages from the team. We always query at the team level —
|
|
// LiteLLM's /spend/logs/v2 doesn't filter by key_alias reliably across
|
|
// versions, so we paginate and post-filter in code. For pilot scale
|
|
// this is cheap; if a single team ever exceeds ~10k entries/month we
|
|
// can revisit.
|
|
const allRequests: any[] = [];
|
|
let page = 1;
|
|
while (true) {
|
|
const result = await getTeamSpendLogsV2(
|
|
teamId,
|
|
startStr,
|
|
endStr,
|
|
page,
|
|
100
|
|
);
|
|
allRequests.push(...(result.data || []));
|
|
if (page >= (result.total_pages || 1)) break;
|
|
page++;
|
|
}
|
|
|
|
// Apply key_alias post-filter when scoping to a single tenant. Match
|
|
// both `key_alias` (newer LiteLLM) and `metadata.user_api_key_alias`
|
|
// (older builds nest it inside metadata).
|
|
const scoped = keyAlias
|
|
? allRequests.filter((r) => {
|
|
const alias =
|
|
r.key_alias ??
|
|
r.metadata?.user_api_key_alias ??
|
|
r.api_key_alias ??
|
|
null;
|
|
return alias === keyAlias;
|
|
})
|
|
: allRequests;
|
|
|
|
// Aggregate by day
|
|
const byDay: Record<
|
|
string,
|
|
{ inputTokens: number; outputTokens: number; spend: number }
|
|
> = {};
|
|
for (const r of scoped) {
|
|
const day = (r.startTime || r.endTime || "").slice(0, 10);
|
|
if (!day) continue;
|
|
if (!byDay[day])
|
|
byDay[day] = { inputTokens: 0, outputTokens: 0, spend: 0 };
|
|
byDay[day].inputTokens += r.prompt_tokens || 0;
|
|
byDay[day].outputTokens += r.completion_tokens || 0;
|
|
byDay[day].spend += r.spend || 0;
|
|
}
|
|
|
|
const dailyUsage = Object.entries(byDay)
|
|
.sort(([a], [b]) => a.localeCompare(b))
|
|
.map(([date, d]) => ({ date, ...d }));
|
|
|
|
const totalInput = scoped.reduce(
|
|
(s, r) => s + (r.prompt_tokens || 0),
|
|
0
|
|
);
|
|
const totalOutput = scoped.reduce(
|
|
(s, r) => s + (r.completion_tokens || 0),
|
|
0
|
|
);
|
|
const totalSpend = scoped.reduce((s, r) => s + (r.spend || 0), 0);
|
|
|
|
return NextResponse.json({
|
|
teamId,
|
|
keyAlias, // null when not filtering — useful for the client to know it sees company-wide data
|
|
month: monthParam,
|
|
currentPeriod: {
|
|
inputTokens: totalInput,
|
|
outputTokens: totalOutput,
|
|
totalSpend,
|
|
requestCount: scoped.length,
|
|
},
|
|
// Budget is always team-level (= company budget). Spend reported
|
|
// here is the team total, not the per-key total — the customer
|
|
// wants to see "how much of our company budget is left", not just
|
|
// "how much has this one tenant cost".
|
|
budget: {
|
|
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
|
spend: teamInfo?.team_info?.spend ?? 0,
|
|
remaining: teamInfo?.team_info?.max_budget
|
|
? teamInfo.team_info.max_budget - (teamInfo.team_info.spend ?? 0)
|
|
: null,
|
|
},
|
|
rateLimits: {
|
|
rpm: teamInfo?.team_info?.rpm_limit ?? null,
|
|
tpm: teamInfo?.team_info?.tpm_limit ?? null,
|
|
},
|
|
dailyUsage,
|
|
});
|
|
} catch (e: any) {
|
|
console.error("Usage fetch error:", e.message);
|
|
return NextResponse.json(
|
|
{ error: safeError(e, "Failed to fetch usage") },
|
|
{ status: 500 }
|
|
);
|
|
}
|
|
}
|