pieced-portal/src/app/api/usage/route.ts

import { NextRequest, NextResponse } from "next/server";
import { getSessionUser } from "@/lib/session";
import { listTenants } from "@/lib/k8s";
import { listVisibleTenants } from "@/lib/visibility";
import {
  getTeamInfo,
  getTeamSpendLogsV2,
  findKeyByAlias,
} from "@/lib/litellm";
import { safeError } from "@/lib/errors";

/**
 * GET /api/usage
 *
 * Per-tenant spend/token usage for a given month.
 *
 * Resolution rules (in priority order)
 * ------------------------------------
 *  1. `?tenant=<name>` query param — the canonical path. The route
 *     looks up the PiecedTenant CR by name, runs it through the
 *     viewer's visibility filter, and reads `status.litellmTeamId` +
 *     `status.litellmKeyAlias`. This is what the tenant-detail page
 *     calls with for both customers and admins.
 *  2. `?teamId=<id>` (+ optional `?keyAlias=<alias>`) — admin escape
 *     hatch for debugging across orgs (e.g. opening the platform
 *     panel without a specific tenant in mind). Platform-only;
 *     ignored for customer sessions.
 *  3. No params — 400. We deliberately do NOT fall back to "the
 *     first visible tenant". Bug 19: that fallback meant siblings
 *     in the same org showed identical numbers because the API
 *     always picked the same "first" tenant regardless of which
 *     detail page the customer was viewing. Forcing callers to be
 *     explicit makes the bug structurally impossible to reintroduce.
 *
 * Filtering
 * ---------
 * LiteLLM's `/spend/logs/v2` accepts a server-side `key_alias` filter.
 * We pass it through directly — no more "fetch all team pages and
 * post-filter in JS" (which was O(team_total) memory per request and
 * masked the routing bug above by being slow enough that nobody
 * noticed which alias was actually being used).
 *
 * The team-level budget is still surfaced as the *org* budget, since
 * teams are org-scoped post-Slice-2. That's intentional: the customer
 * sees "your company has X budget remaining" alongside "this tenant
 * cost Y this month".
 */
export async function GET(req: NextRequest) {
  const user = await getSessionUser();
  if (!user)
    return NextResponse.json({ error: "Unauthorized" }, { status: 401 });

  const tenantName = req.nextUrl.searchParams.get("tenant");
  let teamId: string | null = null;
  let keyAlias: string | null = null;

  if (tenantName) {
    // Path 1: resolve from tenant name with visibility check.
    //
    // listVisibleTenants enforces the same visibility rules as every
    // other read endpoint:
    //   - platform admins see everything
    //   - owners see all tenants in their org
    //   - users see only the tenants they're assigned to (Slice 6)
    //
    // Filtering through that list rather than reading the CR directly
    // means a malicious caller can't probe arbitrary tenant names to
    // learn what exists in other orgs.
    const allTenants = await listTenants();
    const visible = await listVisibleTenants(user, allTenants);
    const tenant = visible.find((t) => t.metadata.name === tenantName);

    if (!tenant) {
      return NextResponse.json(
        { error: "Tenant not found or not accessible" },
        { status: 404 }
      );
    }
    if (!tenant.status?.litellmTeamId) {
      // Tenant exists but the operator hasn't reconciled it yet.
      // Common right after onboarding; the customer should see a
      // friendly empty state, not a 500.
      return NextResponse.json(
        { error: "Tenant is still provisioning, no usage data yet" },
        { status: 409 }
      );
    }
    teamId = tenant.status.litellmTeamId;
    // litellmKeyAlias is set by the operator's LiteLLM reconcile step
    // alongside litellmTeamId, so if teamId is present this should be
    // too. Defensive fallback to team-level if missing — in that case
    // the customer briefly sees company totals until the next operator
    // reconcile, which is better than 500.
    keyAlias = tenant.status.litellmKeyAlias ?? null;
  } else if (user.isPlatform) {
    // Path 2: admin escape hatch.
    teamId = req.nextUrl.searchParams.get("teamId");
    keyAlias = req.nextUrl.searchParams.get("keyAlias");
    if (!teamId) {
      return NextResponse.json(
        {
          error:
            "Either ?tenant=<name> or ?teamId=<id> (admin) must be provided",
        },
        { status: 400 }
      );
    }
  } else {
    // Path 3: no resolution possible. See doc above for why we don't
    // pick a default.
    return NextResponse.json(
      { error: "Tenant must be specified via ?tenant=<name>" },
      { status: 400 }
    );
  }

  // Month param: YYYY-MM, defaults to current month.
  const now = new Date();
  const monthParam =
    req.nextUrl.searchParams.get("month") ||
    `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}`;

  const [year, month] = monthParam.split("-").map(Number);
  const startDate = new Date(year, month - 1, 1);
  const endDate = new Date(year, month, 0); // last day of month

  const startStr = startDate.toISOString().split("T")[0];
  const endStr = endDate.toISOString().split("T")[0];

  try {
    const teamInfo = await getTeamInfo(teamId);

    // Per-tenant budget lives on the virtual key, not the team
    // (Feature 7 fix). When the request is scoped to a specific
    // tenant (keyAlias provided), look up the key so we can return
    // the per-tenant cap. Tolerate failure — older LiteLLM builds
    // or short-lived race conditions during provisioning shouldn't
    // 500 the whole usage page; we degrade to "no key info".
    const keyInfo = keyAlias
      ? await findKeyByAlias(teamId, keyAlias).catch(() => null)
      : null;

    // Page through results — server-side filtered by key_alias when
    // provided. Pagination still needed because LiteLLM caps
    // page_size at 100, and a busy tenant can easily exceed that in
    // a month. With server-side filtering this stays cheap regardless
    // of how busy sibling tenants in the same team are.
    const allRequests: any[] = [];
    let page = 1;
    while (true) {
      const result = await getTeamSpendLogsV2(
        teamId,
        startStr,
        endStr,
        page,
        100,
        keyAlias
      );
      allRequests.push(...(result.data || []));
      if (page >= (result.total_pages || 1)) break;
      page++;
      // Defensive cap. A pathological response with bogus total_pages
      // shouldn't be able to spin us forever. 50 pages × 100 = 5000
      // entries/month/tenant is well above any realistic usage at
      // pilot scale.
      if (page > 50) break;
    }

    // Aggregate by day.
    const byDay: Record<
      string,
      { inputTokens: number; outputTokens: number; spend: number }
    > = {};
    for (const r of allRequests) {
      const day = (r.startTime || r.endTime || "").slice(0, 10);
      if (!day) continue;
      if (!byDay[day])
        byDay[day] = { inputTokens: 0, outputTokens: 0, spend: 0 };
      byDay[day].inputTokens += r.prompt_tokens || 0;
      byDay[day].outputTokens += r.completion_tokens || 0;
      byDay[day].spend += r.spend || 0;
    }

    const dailyUsage = Object.entries(byDay)
      .sort(([a], [b]) => a.localeCompare(b))
      .map(([date, d]) => ({ date, ...d }));

    const totalInput = allRequests.reduce(
      (s, r) => s + (r.prompt_tokens || 0),
      0
    );
    const totalOutput = allRequests.reduce(
      (s, r) => s + (r.completion_tokens || 0),
      0
    );
    const totalSpend = allRequests.reduce((s, r) => s + (r.spend || 0), 0);

    return NextResponse.json({
      teamId,
      keyAlias, // null when admin queries team-wide (no specific tenant)
      month: monthParam,
      currentPeriod: {
        inputTokens: totalInput,
        outputTokens: totalOutput,
        totalSpend,
        requestCount: allRequests.length,
      },
      // Budget reporting (Feature 7).
      //
      // When the caller scopes to a specific tenant (keyAlias set),
      // we report THAT tenant's per-key budget — that's what the
      // tenant detail page renders, and what the customer expects
      // when they see "Budget" on a tenant's page.
      //
      // When unscoped (admin / org-wide view), we fall back to the
      // team budget — that's the org-wide cap, conceptually different
      // but the only thing meaningful at that scope.
      //
      // The two cases display the same way; the editor button gates
      // on whether we know which tenant we're on (= keyAlias set).
      budget: keyAlias && keyInfo
        ? {
            maxBudget: keyInfo.maxBudget,
            spend: keyInfo.spend,
            remaining:
              keyInfo.maxBudget !== null
                ? keyInfo.maxBudget - keyInfo.spend
                : null,
            budgetDuration: keyInfo.budgetDuration,
          }
        : {
            maxBudget: teamInfo?.team_info?.max_budget ?? null,
            spend: teamInfo?.team_info?.spend ?? 0,
            remaining: teamInfo?.team_info?.max_budget
              ? teamInfo.team_info.max_budget -
                (teamInfo.team_info.spend ?? 0)
              : null,
            budgetDuration: teamInfo?.team_info?.budget_duration ?? null,
          },
      rateLimits: {
        rpm: teamInfo?.team_info?.rpm_limit ?? null,
        tpm: teamInfo?.team_info?.tpm_limit ?? null,
      },
      dailyUsage,
    });
  } catch (e: any) {
    console.error("Usage fetch error:", e.message);
    return NextResponse.json(
      { error: safeError(e, "Failed to fetch usage") },
      { status: 500 }
    );
  }
}