118 lines
3.6 KiB
TypeScript
118 lines
3.6 KiB
TypeScript
import { NextResponse } from "next/server";
|
|
import { requirePlatformRole } from "@/lib/session";
|
|
import { listTenants } from "@/lib/k8s";
|
|
import {
|
|
getLitellmHealth,
|
|
getGlobalSpend,
|
|
getPerKeySpend,
|
|
getPerTeamSpend,
|
|
} from "@/lib/litellm";
|
|
|
|
const VLLM_URL =
|
|
process.env.VLLM_HEALTH_URL ?? "http://vllm-inference.inference.svc:8000";
|
|
|
|
async function checkVllmHealth(): Promise<{
|
|
healthy: boolean;
|
|
details?: any;
|
|
}> {
|
|
try {
|
|
const res = await fetch(`${VLLM_URL}/health`, {
|
|
signal: AbortSignal.timeout(5000),
|
|
});
|
|
if (res.ok) return { healthy: true };
|
|
return { healthy: false, details: `HTTP ${res.status}` };
|
|
} catch (e: any) {
|
|
return { healthy: false, details: e.message };
|
|
}
|
|
}
|
|
|
|
/**
|
|
* GET /api/admin/health
|
|
* Returns system health overview for the admin panel.
|
|
*
|
|
* Slice 2 spend layout
|
|
* --------------------
|
|
* - `spend.global` — total across all teams (LiteLLM-reported)
|
|
* - `spend.perTenant[name]` — per-tenant CHF, derived from the per-key
|
|
* spend map keyed by `litellmKeyAlias`. Only
|
|
* populated for tenants whose status carries
|
|
* an alias (post-Slice-2 reconciled CRs).
|
|
* - `spend.perOrg[teamId]` — company-level total (= LiteLLM team total).
|
|
* Useful for the admin overview to see
|
|
* spend-per-customer at a glance.
|
|
*/
|
|
export async function GET() {
|
|
try {
|
|
await requirePlatformRole();
|
|
} catch {
|
|
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
|
|
}
|
|
|
|
const [tenants, litellm, vllm, globalSpend, perKeySpend, perTeamSpend] =
|
|
await Promise.allSettled([
|
|
listTenants(),
|
|
getLitellmHealth(),
|
|
checkVllmHealth(),
|
|
getGlobalSpend(),
|
|
getPerKeySpend(),
|
|
getPerTeamSpend(),
|
|
]);
|
|
|
|
const allTenants = tenants.status === "fulfilled" ? tenants.value : [];
|
|
|
|
// Count tenants by phase
|
|
const phaseCounts: Record<string, number> = {};
|
|
for (const t of allTenants) {
|
|
const phase = t.spec.suspend
|
|
? "Suspended"
|
|
: t.status?.phase ?? "Pending";
|
|
phaseCounts[phase] = (phaseCounts[phase] || 0) + 1;
|
|
}
|
|
|
|
// Build per-tenant spend map (tenantName → spend) from the per-key map.
|
|
// Tenants without a `litellmKeyAlias` in status are skipped — they
|
|
// simply won't appear in this map until they've been reconciled by
|
|
// the Slice-2 operator.
|
|
const keySpend =
|
|
perKeySpend.status === "fulfilled" ? perKeySpend.value : new Map();
|
|
const tenantSpend: Record<string, number> = {};
|
|
for (const t of allTenants) {
|
|
const alias = t.status?.litellmKeyAlias;
|
|
if (alias && keySpend.has(alias)) {
|
|
tenantSpend[t.metadata.name] = keySpend.get(alias)!;
|
|
}
|
|
}
|
|
|
|
// Build per-org spend map (teamId → spend). Multiple tenants of the
|
|
// same org share a teamId, so the same number appears for each.
|
|
const teamSpend =
|
|
perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map();
|
|
const orgSpend: Record<string, number> = {};
|
|
for (const [teamId, spend] of teamSpend.entries()) {
|
|
orgSpend[teamId] = spend;
|
|
}
|
|
|
|
return NextResponse.json({
|
|
tenants: {
|
|
total: allTenants.length,
|
|
phases: phaseCounts,
|
|
},
|
|
spend: {
|
|
global:
|
|
globalSpend.status === "fulfilled" ? globalSpend.value : 0,
|
|
perTenant: tenantSpend,
|
|
perOrg: orgSpend,
|
|
},
|
|
services: {
|
|
litellm:
|
|
litellm.status === "fulfilled"
|
|
? litellm.value
|
|
: { healthy: false, details: "fetch failed" },
|
|
vllm:
|
|
vllm.status === "fulfilled"
|
|
? vllm.value
|
|
: { healthy: false, details: "fetch failed" },
|
|
},
|
|
});
|
|
}
|