Add Health and Spend for Admins

This commit is contained in:
2026-04-11 22:36:36 +02:00
parent fdb56490dd
commit 1edb5785e3
10 changed files with 667 additions and 126 deletions

View File

@@ -0,0 +1,92 @@
import { NextResponse } from "next/server";
import { requirePlatformRole } from "@/lib/session";
import { listTenants } from "@/lib/k8s";
import {
getLitellmHealth,
getGlobalSpend,
getPerTeamSpend,
} from "@/lib/litellm";
const VLLM_URL =
process.env.VLLM_HEALTH_URL ?? "http://vllm-inference.inference.svc:8000";
async function checkVllmHealth(): Promise<{
healthy: boolean;
details?: any;
}> {
try {
const res = await fetch(`${VLLM_URL}/health`, {
signal: AbortSignal.timeout(5000),
});
if (res.ok) return { healthy: true };
return { healthy: false, details: `HTTP ${res.status}` };
} catch (e: any) {
return { healthy: false, details: e.message };
}
}
/**
* GET /api/admin/health
* Returns system health overview for the admin panel.
*/
export async function GET() {
try {
await requirePlatformRole();
} catch {
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
}
const [tenants, litellm, vllm, globalSpend, perTeamSpend] =
await Promise.allSettled([
listTenants(),
getLitellmHealth(),
checkVllmHealth(),
getGlobalSpend(),
getPerTeamSpend(),
]);
const allTenants =
tenants.status === "fulfilled" ? tenants.value : [];
// Count tenants by phase
const phaseCounts: Record<string, number> = {};
for (const t of allTenants) {
const phase = t.spec.suspend
? "Suspended"
: t.status?.phase ?? "Pending";
phaseCounts[phase] = (phaseCounts[phase] || 0) + 1;
}
// Build per-tenant spend map (tenantName → spend)
const spendMap: Record<string, number> = {};
const teamSpend =
perTeamSpend.status === "fulfilled" ? perTeamSpend.value : new Map();
for (const t of allTenants) {
const teamId = t.status?.litellmTeamId;
if (teamId && teamSpend.has(teamId)) {
spendMap[t.metadata.name] = teamSpend.get(teamId)!;
}
}
return NextResponse.json({
tenants: {
total: allTenants.length,
phases: phaseCounts,
},
spend: {
global:
globalSpend.status === "fulfilled" ? globalSpend.value : 0,
perTenant: spendMap,
},
services: {
litellm:
litellm.status === "fulfilled"
? litellm.value
: { healthy: false, details: "fetch failed" },
vllm:
vllm.status === "fulfilled"
? vllm.value
: { healthy: false, details: "fetch failed" },
},
});
}