OneLiteLLM team per company+virt keys

2026-04-26 21:21:02 +02:00
parent 1f48712e42
commit 7b22bc4087
7 changed files with 247 additions and 29 deletions
--- a/src/app/api/admin/health/route.ts
+++ b/src/app/api/admin/health/route.ts
@@ -4,6 +4,7 @@ import { listTenants } from "@/lib/k8s";
 import {
  getLitellmHealth,
  getGlobalSpend,
+  getPerKeySpend,
  getPerTeamSpend,
 } from "@/lib/litellm";

@@ -28,6 +29,17 @@ async function checkVllmHealth(): Promise<{
 /**
 * GET /api/admin/health
 * Returns system health overview for the admin panel.
+ *
+ * Slice 2 spend layout
+ * --------------------
+ * - `spend.global`           — total across all teams (LiteLLM-reported)
+ * - `spend.perTenant[name]`  — per-tenant CHF, derived from the per-key
+ *                              spend map keyed by `litellmKeyAlias`. Only
+ *                              populated for tenants whose status carries
+ *                              an alias (post-Slice-2 reconciled CRs).
+ * - `spend.perOrg[teamId]`   — company-level total (= LiteLLM team total).
+ *                              Useful for the admin overview to see
+ *                              spend-per-customer at a glance.
 */
 export async function GET() {
  try {
@@ -36,17 +48,17 @@ export async function GET() {
    return NextResponse.json({ error: "Forbidden" }, { status: 403 });
  }

-  const [tenants, litellm, vllm, globalSpend, perTeamSpend] =
+  const [tenants, litellm, vllm, globalSpend, perKeySpend, perTeamSpend] =
    await Promise.allSettled([
      listTenants(),
      getLitellmHealth(),
      checkVllmHealth(),
      getGlobalSpend(),
+      getPerKeySpend(),
      getPerTeamSpend(),
    ]);

-  const allTenants =
-    tenants.status === "fulfilled" ? tenants.value : [];
+  const allTenants = tenants.status === "fulfilled" ? tenants.value : [];

  // Count tenants by phase
  const phaseCounts: Record<string, number> = {};
@@ -57,15 +69,27 @@ export async function GET() {
    phaseCounts[phase] = (phaseCounts[phase] || 0) + 1;
  }

-  // Build per-tenant spend map (tenantName → spend)
-  const spendMap: Record<string, number> = {};
+  // Build per-tenant spend map (tenantName → spend) from the per-key map.
+  // Tenants without a `litellmKeyAlias` in status are skipped — they
+  // simply won't appear in this map until they've been reconciled by
+  // the Slice-2 operator.
+  const keySpend =
+    perKeySpend.status === "fulfilled" ? perKeySpend.value : new Map();
+  const tenantSpend: Record<string, number> = {};
+  for (const t of allTenants) {
+    const alias = t.status?.litellmKeyAlias;
+    if (alias && keySpend.has(alias)) {
+      tenantSpend[t.metadata.name] = keySpend.get(alias)!;
+    }
+  }
+
+  // Build per-org spend map (teamId → spend). Multiple tenants of the
+  // same org share a teamId, so the same number appears for each.
  const teamSpend =
    perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map();
-  for (const t of allTenants) {
-    const teamId = t.status?.litellmTeamId;
-    if (teamId && teamSpend.has(teamId)) {
-      spendMap[t.metadata.name] = teamSpend.get(teamId)!;
-    }
+  const orgSpend: Record<string, number> = {};
+  for (const [teamId, spend] of teamSpend.entries()) {
+    orgSpend[teamId] = spend;
  }

  return NextResponse.json({
@@ -76,7 +100,8 @@ export async function GET() {
    spend: {
      global:
        globalSpend.status === "fulfilled" ? globalSpend.value : 0,
-      perTenant: spendMap,
+      perTenant: tenantSpend,
+      perOrg: orgSpend,
    },
    services: {
      litellm:
--- a/src/app/api/usage/route.ts
+++ b/src/app/api/usage/route.ts
@@ -7,9 +7,21 @@ import { safeError } from "@/lib/errors";
 /**
 * GET /api/usage
 *
- * Customers: teamId is resolved server-side from the tenant matching the
- *            user's orgId. No client-supplied teamId accepted.
- * Platform admins: may pass ?teamId=... to inspect any tenant's usage.
+ * Customers: tenant resolved server-side from the user's orgId. The
+ *            response is filtered by the tenant's `litellmKeyAlias` so
+ *            sibling tenants in the same org don't bleed into the total.
+ * Platform admins: may pass ?teamId=... to inspect any team. They may
+ *                  also pass ?keyAlias=... to scope to a single tenant.
+ *
+ * Slice 2 note
+ * ------------
+ * LiteLLM teams are now shared across all tenants of an org. The team's
+ * `/team/info` budget is the *company* budget; the per-tenant numbers
+ * come from filtering spend logs by `key_alias`. If a tenant has no
+ * `litellmKeyAlias` in status (transitional state right after upgrade,
+ * before the operator has reconciled), we fall back to team-level
+ * filtering — the numbers will be slightly inflated for that one
+ * reconcile cycle.
 */
 export async function GET(req: NextRequest) {
  const user = await getSessionUser();
@@ -17,13 +29,14 @@ export async function GET(req: NextRequest) {
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });

  let teamId: string | null = null;
+  let keyAlias: string | null = null;

  if (user.isPlatform) {
-    // Admins may pass a specific teamId to inspect any tenant
    teamId = req.nextUrl.searchParams.get("teamId") ?? null;
+    keyAlias = req.nextUrl.searchParams.get("keyAlias") ?? null;
  }

-  // For customers (or admins without explicit teamId): resolve from their tenant
+  // For customers (or admins without explicit params): resolve from their tenant.
  if (!teamId) {
    const tenants = await listTenants();
    const orgTenant = tenants.find(
@@ -37,6 +50,13 @@ export async function GET(req: NextRequest) {
      );
    }
    teamId = orgTenant.status.litellmTeamId;
+
+    // If the operator has populated the per-tenant key alias, filter by it.
+    // Falling back to team-level (no alias) will return the org total, which
+    // is acceptable transitionally but means siblings' usage shows up here.
+    if (orgTenant.status.litellmKeyAlias) {
+      keyAlias = orgTenant.status.litellmKeyAlias;
+    }
  }

  // Month param: YYYY-MM, defaults to current month
@@ -55,7 +75,11 @@ export async function GET(req: NextRequest) {
  try {
    const teamInfo = await getTeamInfo(teamId);

-    // Fetch all pages
+    // Fetch all pages from the team. We always query at the team level —
+    // LiteLLM's /spend/logs/v2 doesn't filter by key_alias reliably across
+    // versions, so we paginate and post-filter in code. For pilot scale
+    // this is cheap; if a single team ever exceeds ~10k entries/month we
+    // can revisit.
    const allRequests: any[] = [];
    let page = 1;
    while (true) {
@@ -71,12 +95,26 @@ export async function GET(req: NextRequest) {
      page++;
    }

+    // Apply key_alias post-filter when scoping to a single tenant. Match
+    // both `key_alias` (newer LiteLLM) and `metadata.user_api_key_alias`
+    // (older builds nest it inside metadata).
+    const scoped = keyAlias
+      ? allRequests.filter((r) => {
+          const alias =
+            r.key_alias ??
+            r.metadata?.user_api_key_alias ??
+            r.api_key_alias ??
+            null;
+          return alias === keyAlias;
+        })
+      : allRequests;
+
    // Aggregate by day
    const byDay: Record<
      string,
      { inputTokens: number; outputTokens: number; spend: number }
    > = {};
-    for (const r of allRequests) {
+    for (const r of scoped) {
      const day = (r.startTime || r.endTime || "").slice(0, 10);
      if (!day) continue;
      if (!byDay[day])
@@ -90,25 +128,30 @@ export async function GET(req: NextRequest) {
      .sort(([a], [b]) => a.localeCompare(b))
      .map(([date, d]) => ({ date, ...d }));

-    const totalInput = allRequests.reduce(
+    const totalInput = scoped.reduce(
      (s, r) => s + (r.prompt_tokens || 0),
      0
    );
-    const totalOutput = allRequests.reduce(
+    const totalOutput = scoped.reduce(
      (s, r) => s + (r.completion_tokens || 0),
      0
    );
-    const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0);
+    const totalSpend = scoped.reduce((s, r) => s + (r.spend || 0), 0);

    return NextResponse.json({
      teamId,
+      keyAlias, // null when not filtering — useful for the client to know it sees company-wide data
      month: monthParam,
      currentPeriod: {
        inputTokens: totalInput,
        outputTokens: totalOutput,
        totalSpend,
-        requestCount: allRequests.length,
+        requestCount: scoped.length,
      },
+      // Budget is always team-level (= company budget). Spend reported
+      // here is the team total, not the per-key total — the customer
+      // wants to see "how much of our company budget is left", not just
+      // "how much has this one tenant cost".
      budget: {
        maxBudget: teamInfo?.team_info?.max_budget ?? null,
        spend: teamInfo?.team_info?.spend ?? 0,