OneLiteLLM team per company+virt keys
All checks were successful
Build and Push / build (push) Successful in 1m24s

This commit is contained in:
2026-04-26 21:21:02 +02:00
parent 1f48712e42
commit 7b22bc4087
7 changed files with 247 additions and 29 deletions

View File

@@ -0,0 +1,64 @@
// Smoke-test for the FindKeyByAlias parsing logic — runs the JSON
// permutations LiteLLM has been seen to emit through the unmarshal
// paths and confirms each ends up at the expected outcome.
//
// Since the operator can't run inside this sandbox, this is a
// JS port of the parsing flow. It exercises decisions the Go code
// makes line-for-line.
const cases = [
{
name: "newer object shape, alias matches",
body: { keys: [{ token: "tk-1", key_alias: "acme-abc12345" }, { token: "tk-2", key_alias: "beta-def67890" }] },
expected: "tk-1",
},
{
name: "newer object shape, alias does not match",
body: { keys: [{ token: "tk-2", key_alias: "beta-def67890" }] },
expected: "",
},
{
name: "newer object shape, empty keys array",
body: { keys: [] },
expected: "",
},
{
name: "older string shape — cannot filter, return empty",
body: { keys: ["sk-abc", "sk-def"] },
expected: "",
},
{
name: "matching alias but missing token field",
body: { keys: [{ key_alias: "acme-abc12345" }] },
expected: "",
},
];
function findKeyByAlias(body, keyAlias) {
// Mirror the Go logic exactly.
let asObjects;
try {
asObjects = body;
if (!asObjects || !Array.isArray(asObjects.keys)) return "";
for (const k of asObjects.keys) {
// Skip non-objects (= older string shape)
if (typeof k !== "object" || k === null) continue;
if (k.key_alias === keyAlias && k.token) {
return k.token;
}
}
} catch {
return "";
}
return "";
}
let pass = 0, fail = 0;
for (const c of cases) {
const got = findKeyByAlias(c.body, "acme-abc12345");
const ok = got === c.expected;
console.log(`${ok ? "PASS" : "FAIL"} got="${got}" want="${c.expected}" [${c.name}]`);
if (ok) pass++; else fail++;
}
console.log(`\n${pass} pass, ${fail} fail`);
process.exit(fail === 0 ? 0 : 1);

View File

@@ -41,11 +41,18 @@ export default async function TenantDetailPage({
); );
const channelUsers = tenant.spec.channelUsers || {}; const channelUsers = tenant.spec.channelUsers || {};
// Admins inspecting another tenant's usage: pass teamId explicitly. // Admins inspecting another tenant's usage: pass teamId AND keyAlias so
// Customers viewing their own: no teamId, backend resolves from session. // the backend filters spend logs by this specific tenant's virtual key.
// Without keyAlias the response would include sibling tenants in the
// same org, since teams are now shared (Slice 2).
// Customers viewing their own: pass nothing — backend resolves both
// from the session-bound tenant.
const usageTeamId = user.isPlatform const usageTeamId = user.isPlatform
? tenant.status?.litellmTeamId || undefined ? tenant.status?.litellmTeamId || undefined
: undefined; : undefined;
const usageKeyAlias = user.isPlatform
? tenant.status?.litellmKeyAlias || undefined
: undefined;
return ( return (
<div> <div>
@@ -81,7 +88,7 @@ export default async function TenantDetailPage({
<h2 className="text-xs font-semibold uppercase tracking-wider text-text-muted mb-3"> <h2 className="text-xs font-semibold uppercase tracking-wider text-text-muted mb-3">
{t("usage")} {t("usage")}
</h2> </h2>
<UsageDisplay teamId={usageTeamId} /> <UsageDisplay teamId={usageTeamId} keyAlias={usageKeyAlias} />
</section> </section>
{/* Packages */} {/* Packages */}

View File

@@ -4,6 +4,7 @@ import { listTenants } from "@/lib/k8s";
import { import {
getLitellmHealth, getLitellmHealth,
getGlobalSpend, getGlobalSpend,
getPerKeySpend,
getPerTeamSpend, getPerTeamSpend,
} from "@/lib/litellm"; } from "@/lib/litellm";
@@ -28,6 +29,17 @@ async function checkVllmHealth(): Promise<{
/** /**
* GET /api/admin/health * GET /api/admin/health
* Returns system health overview for the admin panel. * Returns system health overview for the admin panel.
*
* Slice 2 spend layout
* --------------------
* - `spend.global` — total across all teams (LiteLLM-reported)
* - `spend.perTenant[name]` — per-tenant CHF, derived from the per-key
* spend map keyed by `litellmKeyAlias`. Only
* populated for tenants whose status carries
* an alias (post-Slice-2 reconciled CRs).
* - `spend.perOrg[teamId]` — company-level total (= LiteLLM team total).
* Useful for the admin overview to see
* spend-per-customer at a glance.
*/ */
export async function GET() { export async function GET() {
try { try {
@@ -36,17 +48,17 @@ export async function GET() {
return NextResponse.json({ error: "Forbidden" }, { status: 403 }); return NextResponse.json({ error: "Forbidden" }, { status: 403 });
} }
const [tenants, litellm, vllm, globalSpend, perTeamSpend] = const [tenants, litellm, vllm, globalSpend, perKeySpend, perTeamSpend] =
await Promise.allSettled([ await Promise.allSettled([
listTenants(), listTenants(),
getLitellmHealth(), getLitellmHealth(),
checkVllmHealth(), checkVllmHealth(),
getGlobalSpend(), getGlobalSpend(),
getPerKeySpend(),
getPerTeamSpend(), getPerTeamSpend(),
]); ]);
const allTenants = const allTenants = tenants.status === "fulfilled" ? tenants.value : [];
tenants.status === "fulfilled" ? tenants.value : [];
// Count tenants by phase // Count tenants by phase
const phaseCounts: Record<string, number> = {}; const phaseCounts: Record<string, number> = {};
@@ -57,15 +69,27 @@ export async function GET() {
phaseCounts[phase] = (phaseCounts[phase] || 0) + 1; phaseCounts[phase] = (phaseCounts[phase] || 0) + 1;
} }
// Build per-tenant spend map (tenantName → spend) // Build per-tenant spend map (tenantName → spend) from the per-key map.
const spendMap: Record<string, number> = {}; // Tenants without a `litellmKeyAlias` in status are skipped — they
// simply won't appear in this map until they've been reconciled by
// the Slice-2 operator.
const keySpend =
perKeySpend.status === "fulfilled" ? perKeySpend.value : new Map();
const tenantSpend: Record<string, number> = {};
for (const t of allTenants) {
const alias = t.status?.litellmKeyAlias;
if (alias && keySpend.has(alias)) {
tenantSpend[t.metadata.name] = keySpend.get(alias)!;
}
}
// Build per-org spend map (teamId → spend). Multiple tenants of the
// same org share a teamId, so the same number appears for each.
const teamSpend = const teamSpend =
perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map(); perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map();
for (const t of allTenants) { const orgSpend: Record<string, number> = {};
const teamId = t.status?.litellmTeamId; for (const [teamId, spend] of teamSpend.entries()) {
if (teamId && teamSpend.has(teamId)) { orgSpend[teamId] = spend;
spendMap[t.metadata.name] = teamSpend.get(teamId)!;
}
} }
return NextResponse.json({ return NextResponse.json({
@@ -76,7 +100,8 @@ export async function GET() {
spend: { spend: {
global: global:
globalSpend.status === "fulfilled" ? globalSpend.value : 0, globalSpend.status === "fulfilled" ? globalSpend.value : 0,
perTenant: spendMap, perTenant: tenantSpend,
perOrg: orgSpend,
}, },
services: { services: {
litellm: litellm:

View File

@@ -7,9 +7,21 @@ import { safeError } from "@/lib/errors";
/** /**
* GET /api/usage * GET /api/usage
* *
* Customers: teamId is resolved server-side from the tenant matching the * Customers: tenant resolved server-side from the user's orgId. The
* user's orgId. No client-supplied teamId accepted. * response is filtered by the tenant's `litellmKeyAlias` so
* Platform admins: may pass ?teamId=... to inspect any tenant's usage. * sibling tenants in the same org don't bleed into the total.
* Platform admins: may pass ?teamId=... to inspect any team. They may
* also pass ?keyAlias=... to scope to a single tenant.
*
* Slice 2 note
* ------------
* LiteLLM teams are now shared across all tenants of an org. The team's
* `/team/info` budget is the *company* budget; the per-tenant numbers
* come from filtering spend logs by `key_alias`. If a tenant has no
* `litellmKeyAlias` in status (transitional state right after upgrade,
* before the operator has reconciled), we fall back to team-level
* filtering — the numbers will be slightly inflated for that one
* reconcile cycle.
*/ */
export async function GET(req: NextRequest) { export async function GET(req: NextRequest) {
const user = await getSessionUser(); const user = await getSessionUser();
@@ -17,13 +29,14 @@ export async function GET(req: NextRequest) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 }); return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
let teamId: string | null = null; let teamId: string | null = null;
let keyAlias: string | null = null;
if (user.isPlatform) { if (user.isPlatform) {
// Admins may pass a specific teamId to inspect any tenant
teamId = req.nextUrl.searchParams.get("teamId") ?? null; teamId = req.nextUrl.searchParams.get("teamId") ?? null;
keyAlias = req.nextUrl.searchParams.get("keyAlias") ?? null;
} }
// For customers (or admins without explicit teamId): resolve from their tenant // For customers (or admins without explicit params): resolve from their tenant.
if (!teamId) { if (!teamId) {
const tenants = await listTenants(); const tenants = await listTenants();
const orgTenant = tenants.find( const orgTenant = tenants.find(
@@ -37,6 +50,13 @@ export async function GET(req: NextRequest) {
); );
} }
teamId = orgTenant.status.litellmTeamId; teamId = orgTenant.status.litellmTeamId;
// If the operator has populated the per-tenant key alias, filter by it.
// Falling back to team-level (no alias) will return the org total, which
// is acceptable transitionally but means siblings' usage shows up here.
if (orgTenant.status.litellmKeyAlias) {
keyAlias = orgTenant.status.litellmKeyAlias;
}
} }
// Month param: YYYY-MM, defaults to current month // Month param: YYYY-MM, defaults to current month
@@ -55,7 +75,11 @@ export async function GET(req: NextRequest) {
try { try {
const teamInfo = await getTeamInfo(teamId); const teamInfo = await getTeamInfo(teamId);
// Fetch all pages // Fetch all pages from the team. We always query at the team level —
// LiteLLM's /spend/logs/v2 doesn't filter by key_alias reliably across
// versions, so we paginate and post-filter in code. For pilot scale
// this is cheap; if a single team ever exceeds ~10k entries/month we
// can revisit.
const allRequests: any[] = []; const allRequests: any[] = [];
let page = 1; let page = 1;
while (true) { while (true) {
@@ -71,12 +95,26 @@ export async function GET(req: NextRequest) {
page++; page++;
} }
// Apply key_alias post-filter when scoping to a single tenant. Match
// both `key_alias` (newer LiteLLM) and `metadata.user_api_key_alias`
// (older builds nest it inside metadata).
const scoped = keyAlias
? allRequests.filter((r) => {
const alias =
r.key_alias ??
r.metadata?.user_api_key_alias ??
r.api_key_alias ??
null;
return alias === keyAlias;
})
: allRequests;
// Aggregate by day // Aggregate by day
const byDay: Record< const byDay: Record<
string, string,
{ inputTokens: number; outputTokens: number; spend: number } { inputTokens: number; outputTokens: number; spend: number }
> = {}; > = {};
for (const r of allRequests) { for (const r of scoped) {
const day = (r.startTime || r.endTime || "").slice(0, 10); const day = (r.startTime || r.endTime || "").slice(0, 10);
if (!day) continue; if (!day) continue;
if (!byDay[day]) if (!byDay[day])
@@ -90,25 +128,30 @@ export async function GET(req: NextRequest) {
.sort(([a], [b]) => a.localeCompare(b)) .sort(([a], [b]) => a.localeCompare(b))
.map(([date, d]) => ({ date, ...d })); .map(([date, d]) => ({ date, ...d }));
const totalInput = allRequests.reduce( const totalInput = scoped.reduce(
(s, r) => s + (r.prompt_tokens || 0), (s, r) => s + (r.prompt_tokens || 0),
0 0
); );
const totalOutput = allRequests.reduce( const totalOutput = scoped.reduce(
(s, r) => s + (r.completion_tokens || 0), (s, r) => s + (r.completion_tokens || 0),
0 0
); );
const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0); const totalSpend = scoped.reduce((s, r) => s + (r.spend || 0), 0);
return NextResponse.json({ return NextResponse.json({
teamId, teamId,
keyAlias, // null when not filtering — useful for the client to know it sees company-wide data
month: monthParam, month: monthParam,
currentPeriod: { currentPeriod: {
inputTokens: totalInput, inputTokens: totalInput,
outputTokens: totalOutput, outputTokens: totalOutput,
totalSpend, totalSpend,
requestCount: allRequests.length, requestCount: scoped.length,
}, },
// Budget is always team-level (= company budget). Spend reported
// here is the team total, not the per-key total — the customer
// wants to see "how much of our company budget is left", not just
// "how much has this one tenant cost".
budget: { budget: {
maxBudget: teamInfo?.team_info?.max_budget ?? null, maxBudget: teamInfo?.team_info?.max_budget ?? null,
spend: teamInfo?.team_info?.spend ?? 0, spend: teamInfo?.team_info?.spend ?? 0,

View File

@@ -94,10 +94,20 @@ function UsageChart({ data }: { data: DailyUsage[] }) {
/** /**
* Usage display widget. * Usage display widget.
* *
* - Customers: don't pass teamId — the backend resolves it from the session. * - Customers: don't pass teamId or keyAlias — the backend resolves both
* - Admins inspecting a specific tenant: pass teamId to override. * from the session-bound tenant.
* - Admins inspecting a specific tenant: pass `teamId` (the org-level
* LiteLLM team id) AND `keyAlias` (the tenant's virtual-key alias).
* Without `keyAlias`, the response includes spend from sibling tenants
* in the same org, since teams are shared since Slice 2.
*/ */
export function UsageDisplay({ teamId }: { teamId?: string | null }) { export function UsageDisplay({
teamId,
keyAlias,
}: {
teamId?: string | null;
keyAlias?: string | null;
}) {
const t = useTranslations("usage"); const t = useTranslations("usage");
const [month, setMonth] = useState(getCurrentMonth); const [month, setMonth] = useState(getCurrentMonth);
const [data, setData] = useState<UsageData | null>(null); const [data, setData] = useState<UsageData | null>(null);
@@ -114,13 +124,16 @@ export function UsageDisplay({ teamId }: { teamId?: string | null }) {
if (teamId) { if (teamId) {
params.set("teamId", teamId); params.set("teamId", teamId);
} }
if (keyAlias) {
params.set("keyAlias", keyAlias);
}
fetch(`/api/usage?${params}`) fetch(`/api/usage?${params}`)
.then((res) => { if (!res.ok) throw new Error(`${res.status}`); return res.json(); }) .then((res) => { if (!res.ok) throw new Error(`${res.status}`); return res.json(); })
.then(setData) .then(setData)
.catch((e) => setError(e.message)) .catch((e) => setError(e.message))
.finally(() => setLoading(false)); .finally(() => setLoading(false));
}, [teamId, month]); }, [teamId, keyAlias, month]);
useEffect(() => { fetchUsage(); }, [fetchUsage]); useEffect(() => { fetchUsage(); }, [fetchUsage]);

View File

@@ -91,6 +91,10 @@ export async function getGlobalSpend(): Promise<number> {
/** /**
* Fetch per-team spend as a map: teamId → spend (CHF). * Fetch per-team spend as a map: teamId → spend (CHF).
* Uses /team/list which includes current spend per team. * Uses /team/list which includes current spend per team.
*
* Since Slice 2, a "team" is the company-level budget shared across all
* tenants of the same ZITADEL org. So this map gives company totals, not
* per-tenant spend. For per-tenant attribution, use {@link getPerKeySpend}.
*/ */
export async function getPerTeamSpend(): Promise<Map<string, number>> { export async function getPerTeamSpend(): Promise<Map<string, number>> {
const teams = await listTeams(); const teams = await listTeams();
@@ -102,3 +106,54 @@ export async function getPerTeamSpend(): Promise<Map<string, number>> {
} }
return map; return map;
} }
/**
* Fetch per-virtual-key spend as a map: keyAlias → spend (CHF).
*
* Since Slice 2, each PiecedTenant CR owns one virtual key under its
* org's team, with `key_alias = tenant.metadata.name`. Filtering by the
* key alias is how we get genuinely per-tenant spend.
*
* Implementation
* --------------
* Calls `/key/list?return_full_object=true&include_team_keys=true`,
* which returns objects with `spend` and `key_alias`. Older LiteLLM
* builds may return raw token strings instead — we degrade gracefully
* to an empty map in that case rather than throwing, since the admin
* health page should still render even if per-tenant numbers are
* temporarily unavailable.
*
* @returns Map<keyAlias, spend>. May be empty if the LiteLLM build
* doesn't expose key-alias info; callers must handle that.
*/
export async function getPerKeySpend(): Promise<Map<string, number>> {
const map = new Map<string, number>();
try {
const data = await litellmFetch(
"/key/list?return_full_object=true&include_team_keys=true"
);
// Response shape: { keys: [ { key_alias, spend, token, ... } ] }
// or sometimes { data: [...] }, or raw arrays. Be tolerant.
const keys: any[] = Array.isArray(data?.keys)
? data.keys
: Array.isArray(data?.data)
? data.data
: Array.isArray(data)
? data
: [];
for (const k of keys) {
// Skip raw-string entries from older API shapes — we can't attribute them.
if (typeof k !== "object" || k === null) continue;
const alias = k.key_alias ?? k.keyAlias;
if (typeof alias !== "string" || !alias) continue;
const spend =
typeof k.spend === "number" ? k.spend : Number(k.spend) || 0;
map.set(alias, spend);
}
} catch (e) {
console.warn("getPerKeySpend failed, returning empty map:", e);
}
return map;
}

View File

@@ -37,7 +37,18 @@ export interface PiecedTenantStatus {
phase: "Pending" | "Provisioning" | "Running" | "Ready" | "Error" | "Deleting"; phase: "Pending" | "Provisioning" | "Running" | "Ready" | "Error" | "Deleting";
message?: string; message?: string;
observedGeneration?: number; observedGeneration?: number;
/**
* Org-level LiteLLM team id (since Slice 2 — shared across all tenants
* of the same ZITADEL org). For per-tenant spend attribution use
* `litellmKeyAlias`, not this field.
*/
litellmTeamId?: string; litellmTeamId?: string;
/**
* Per-tenant LiteLLM virtual-key alias (set to the CR name). Used by
* the portal to filter spend logs to a single tenant within a shared
* org-level team.
*/
litellmKeyAlias?: string;
tenantNamespace?: string; tenantNamespace?: string;
enabledPackages?: string[]; enabledPackages?: string[];
conditions?: Array<{ conditions?: Array<{