OneLiteLLM team per company+virt keys
All checks were successful
Build and Push / build (push) Successful in 1m24s
All checks were successful
Build and Push / build (push) Successful in 1m24s
This commit is contained in:
@@ -4,6 +4,7 @@ import { listTenants } from "@/lib/k8s";
|
||||
import {
|
||||
getLitellmHealth,
|
||||
getGlobalSpend,
|
||||
getPerKeySpend,
|
||||
getPerTeamSpend,
|
||||
} from "@/lib/litellm";
|
||||
|
||||
@@ -28,6 +29,17 @@ async function checkVllmHealth(): Promise<{
|
||||
/**
|
||||
* GET /api/admin/health
|
||||
* Returns system health overview for the admin panel.
|
||||
*
|
||||
* Slice 2 spend layout
|
||||
* --------------------
|
||||
* - `spend.global` — total across all teams (LiteLLM-reported)
|
||||
* - `spend.perTenant[name]` — per-tenant CHF, derived from the per-key
|
||||
* spend map keyed by `litellmKeyAlias`. Only
|
||||
* populated for tenants whose status carries
|
||||
* an alias (post-Slice-2 reconciled CRs).
|
||||
* - `spend.perOrg[teamId]` — company-level total (= LiteLLM team total).
|
||||
* Useful for the admin overview to see
|
||||
* spend-per-customer at a glance.
|
||||
*/
|
||||
export async function GET() {
|
||||
try {
|
||||
@@ -36,17 +48,17 @@ export async function GET() {
|
||||
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
|
||||
}
|
||||
|
||||
const [tenants, litellm, vllm, globalSpend, perTeamSpend] =
|
||||
const [tenants, litellm, vllm, globalSpend, perKeySpend, perTeamSpend] =
|
||||
await Promise.allSettled([
|
||||
listTenants(),
|
||||
getLitellmHealth(),
|
||||
checkVllmHealth(),
|
||||
getGlobalSpend(),
|
||||
getPerKeySpend(),
|
||||
getPerTeamSpend(),
|
||||
]);
|
||||
|
||||
const allTenants =
|
||||
tenants.status === "fulfilled" ? tenants.value : [];
|
||||
const allTenants = tenants.status === "fulfilled" ? tenants.value : [];
|
||||
|
||||
// Count tenants by phase
|
||||
const phaseCounts: Record<string, number> = {};
|
||||
@@ -57,15 +69,27 @@ export async function GET() {
|
||||
phaseCounts[phase] = (phaseCounts[phase] || 0) + 1;
|
||||
}
|
||||
|
||||
// Build per-tenant spend map (tenantName → spend)
|
||||
const spendMap: Record<string, number> = {};
|
||||
// Build per-tenant spend map (tenantName → spend) from the per-key map.
|
||||
// Tenants without a `litellmKeyAlias` in status are skipped — they
|
||||
// simply won't appear in this map until they've been reconciled by
|
||||
// the Slice-2 operator.
|
||||
const keySpend =
|
||||
perKeySpend.status === "fulfilled" ? perKeySpend.value : new Map();
|
||||
const tenantSpend: Record<string, number> = {};
|
||||
for (const t of allTenants) {
|
||||
const alias = t.status?.litellmKeyAlias;
|
||||
if (alias && keySpend.has(alias)) {
|
||||
tenantSpend[t.metadata.name] = keySpend.get(alias)!;
|
||||
}
|
||||
}
|
||||
|
||||
// Build per-org spend map (teamId → spend). Multiple tenants of the
|
||||
// same org share a teamId, so the same number appears for each.
|
||||
const teamSpend =
|
||||
perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map();
|
||||
for (const t of allTenants) {
|
||||
const teamId = t.status?.litellmTeamId;
|
||||
if (teamId && teamSpend.has(teamId)) {
|
||||
spendMap[t.metadata.name] = teamSpend.get(teamId)!;
|
||||
}
|
||||
const orgSpend: Record<string, number> = {};
|
||||
for (const [teamId, spend] of teamSpend.entries()) {
|
||||
orgSpend[teamId] = spend;
|
||||
}
|
||||
|
||||
return NextResponse.json({
|
||||
@@ -76,7 +100,8 @@ export async function GET() {
|
||||
spend: {
|
||||
global:
|
||||
globalSpend.status === "fulfilled" ? globalSpend.value : 0,
|
||||
perTenant: spendMap,
|
||||
perTenant: tenantSpend,
|
||||
perOrg: orgSpend,
|
||||
},
|
||||
services: {
|
||||
litellm:
|
||||
|
||||
@@ -7,9 +7,21 @@ import { safeError } from "@/lib/errors";
|
||||
/**
|
||||
* GET /api/usage
|
||||
*
|
||||
* Customers: teamId is resolved server-side from the tenant matching the
|
||||
* user's orgId. No client-supplied teamId accepted.
|
||||
* Platform admins: may pass ?teamId=... to inspect any tenant's usage.
|
||||
* Customers: tenant resolved server-side from the user's orgId. The
|
||||
* response is filtered by the tenant's `litellmKeyAlias` so
|
||||
* sibling tenants in the same org don't bleed into the total.
|
||||
* Platform admins: may pass ?teamId=... to inspect any team. They may
|
||||
* also pass ?keyAlias=... to scope to a single tenant.
|
||||
*
|
||||
* Slice 2 note
|
||||
* ------------
|
||||
* LiteLLM teams are now shared across all tenants of an org. The team's
|
||||
* `/team/info` budget is the *company* budget; the per-tenant numbers
|
||||
* come from filtering spend logs by `key_alias`. If a tenant has no
|
||||
* `litellmKeyAlias` in status (transitional state right after upgrade,
|
||||
* before the operator has reconciled), we fall back to team-level
|
||||
* filtering — the numbers will be slightly inflated for that one
|
||||
* reconcile cycle.
|
||||
*/
|
||||
export async function GET(req: NextRequest) {
|
||||
const user = await getSessionUser();
|
||||
@@ -17,13 +29,14 @@ export async function GET(req: NextRequest) {
|
||||
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
|
||||
|
||||
let teamId: string | null = null;
|
||||
let keyAlias: string | null = null;
|
||||
|
||||
if (user.isPlatform) {
|
||||
// Admins may pass a specific teamId to inspect any tenant
|
||||
teamId = req.nextUrl.searchParams.get("teamId") ?? null;
|
||||
keyAlias = req.nextUrl.searchParams.get("keyAlias") ?? null;
|
||||
}
|
||||
|
||||
// For customers (or admins without explicit teamId): resolve from their tenant
|
||||
// For customers (or admins without explicit params): resolve from their tenant.
|
||||
if (!teamId) {
|
||||
const tenants = await listTenants();
|
||||
const orgTenant = tenants.find(
|
||||
@@ -37,6 +50,13 @@ export async function GET(req: NextRequest) {
|
||||
);
|
||||
}
|
||||
teamId = orgTenant.status.litellmTeamId;
|
||||
|
||||
// If the operator has populated the per-tenant key alias, filter by it.
|
||||
// Falling back to team-level (no alias) will return the org total, which
|
||||
// is acceptable transitionally but means siblings' usage shows up here.
|
||||
if (orgTenant.status.litellmKeyAlias) {
|
||||
keyAlias = orgTenant.status.litellmKeyAlias;
|
||||
}
|
||||
}
|
||||
|
||||
// Month param: YYYY-MM, defaults to current month
|
||||
@@ -55,7 +75,11 @@ export async function GET(req: NextRequest) {
|
||||
try {
|
||||
const teamInfo = await getTeamInfo(teamId);
|
||||
|
||||
// Fetch all pages
|
||||
// Fetch all pages from the team. We always query at the team level —
|
||||
// LiteLLM's /spend/logs/v2 doesn't filter by key_alias reliably across
|
||||
// versions, so we paginate and post-filter in code. For pilot scale
|
||||
// this is cheap; if a single team ever exceeds ~10k entries/month we
|
||||
// can revisit.
|
||||
const allRequests: any[] = [];
|
||||
let page = 1;
|
||||
while (true) {
|
||||
@@ -71,12 +95,26 @@ export async function GET(req: NextRequest) {
|
||||
page++;
|
||||
}
|
||||
|
||||
// Apply key_alias post-filter when scoping to a single tenant. Match
|
||||
// both `key_alias` (newer LiteLLM) and `metadata.user_api_key_alias`
|
||||
// (older builds nest it inside metadata).
|
||||
const scoped = keyAlias
|
||||
? allRequests.filter((r) => {
|
||||
const alias =
|
||||
r.key_alias ??
|
||||
r.metadata?.user_api_key_alias ??
|
||||
r.api_key_alias ??
|
||||
null;
|
||||
return alias === keyAlias;
|
||||
})
|
||||
: allRequests;
|
||||
|
||||
// Aggregate by day
|
||||
const byDay: Record<
|
||||
string,
|
||||
{ inputTokens: number; outputTokens: number; spend: number }
|
||||
> = {};
|
||||
for (const r of allRequests) {
|
||||
for (const r of scoped) {
|
||||
const day = (r.startTime || r.endTime || "").slice(0, 10);
|
||||
if (!day) continue;
|
||||
if (!byDay[day])
|
||||
@@ -90,25 +128,30 @@ export async function GET(req: NextRequest) {
|
||||
.sort(([a], [b]) => a.localeCompare(b))
|
||||
.map(([date, d]) => ({ date, ...d }));
|
||||
|
||||
const totalInput = allRequests.reduce(
|
||||
const totalInput = scoped.reduce(
|
||||
(s, r) => s + (r.prompt_tokens || 0),
|
||||
0
|
||||
);
|
||||
const totalOutput = allRequests.reduce(
|
||||
const totalOutput = scoped.reduce(
|
||||
(s, r) => s + (r.completion_tokens || 0),
|
||||
0
|
||||
);
|
||||
const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0);
|
||||
const totalSpend = scoped.reduce((s, r) => s + (r.spend || 0), 0);
|
||||
|
||||
return NextResponse.json({
|
||||
teamId,
|
||||
keyAlias, // null when not filtering — useful for the client to know it sees company-wide data
|
||||
month: monthParam,
|
||||
currentPeriod: {
|
||||
inputTokens: totalInput,
|
||||
outputTokens: totalOutput,
|
||||
totalSpend,
|
||||
requestCount: allRequests.length,
|
||||
requestCount: scoped.length,
|
||||
},
|
||||
// Budget is always team-level (= company budget). Spend reported
|
||||
// here is the team total, not the per-key total — the customer
|
||||
// wants to see "how much of our company budget is left", not just
|
||||
// "how much has this one tenant cost".
|
||||
budget: {
|
||||
maxBudget: teamInfo?.team_info?.max_budget ?? null,
|
||||
spend: teamInfo?.team_info?.spend ?? 0,
|
||||
|
||||
Reference in New Issue
Block a user