Files
pieced-portal/src/app/api/tenants/[name]/suspend/route.ts
admin ce70fe8480
Some checks failed
Build and Push / build (push) Failing after 38s
Phase1: Schema + skill event tracking
2026-05-23 23:45:04 +02:00

170 lines
5.7 KiB
TypeScript

import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant, patchTenantSpec, setTenantAnnotation } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { recordSuspensionEvent } from "@/lib/db";
import { safeError } from "@/lib/errors";
const patchSchema = z.object({
suspend: z.boolean(),
});
/**
* PATCH /api/tenants/[name]/suspend
*
* Direct suspend control on the PiecedTenant CR. Sets `spec.suspend`
* to true (cancel) or false (resume).
*
* Authorization (Bug 37a)
* -----------------------
* - suspend=true → owners and platform admins may call.
* - suspend=false → platform admins ONLY. Owners must go through the
* resume-request flow (POST /api/tenants/[name]/resume-request),
* which creates a pending request for admin approval. This
* asymmetry is by design: cancellation is self-service (low risk;
* reversible by request); reactivation requires admin oversight
* (e.g. to re-validate billing, confirm intent).
*
* Customer flow:
* - Cancel: PATCH suspend=true here
* - Resume: POST /resume-request — creates a 'resume' tenant_request,
* admin approves via /api/admin/requests/[id]/approve which
* then PATCHes suspend=false here as a platform user.
*
* Workload behaviour
* ------------------
* On suspend=true the operator deletes the OpenClawInstance, stopping
* the pod within seconds. Tenant data — namespace, ConfigMaps,
* OpenBao secrets, CNPG database, LiteLLM team — is retained.
*
* Suspended tenants enter a 60-day retention window (operator
* constant `retentionAfterSuspend`); after that, the tenant is fully
* deleted unless a pending resume request exists. The operator
* checks the `pieced.ch/resume-request-pending` annotation to know
* about pending requests; we set it here when admin approves the
* resume (transitively, via the admin-approve endpoint), and clear
* it when the request reaches a terminal state.
*/
export async function PATCH(
req: NextRequest,
{ params }: { params: Promise<{ name: string }> }
) {
const user = await getSessionUser();
if (!user) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
if (!canMutate(user)) {
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
}
const { name } = await params;
const tenant = await getTenant(name);
if (!tenant) {
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
// Identical pattern to the detail page — don't leak existence.
if (!(await canUserSeeTenant(user, tenant))) {
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
const body = await req.json().catch(() => null);
const parsed = patchSchema.safeParse(body);
if (!parsed.success) {
return NextResponse.json(
{ error: "Invalid input", details: parsed.error.flatten() },
{ status: 400 }
);
}
const { suspend } = parsed.data;
// Bug 37a: resume (suspend=false) is platform-admin only via this
// endpoint. Owners must go through the resume-request flow.
if (!suspend && !user.isPlatform) {
return NextResponse.json(
{
error:
"Resume requires platform-admin approval. Submit a resume request via /api/tenants/[name]/resume-request.",
},
{ status: 403 }
);
}
// No-op early exit. Avoids a needless K8s patch + status churn when
// the user double-clicks the button or the UI is briefly out of sync.
if (Boolean(tenant.spec.suspend) === suspend) {
return NextResponse.json(
{ message: "No change.", suspend },
{ status: 200 }
);
}
try {
await patchTenantSpec(name, { suspend });
// Billing — Phase 1: record the transition so monthly proration
// can exclude suspended days from the fixed fee. The portal
// commands this transition; the operator's status.suspendedAt
// lags by a reconcile cycle (seconds), which is irrelevant for
// monthly billing. Best-effort: a logging failure never blocks
// the suspend/resume itself.
try {
const orgId =
tenant.metadata.labels?.["pieced.ch/zitadel-org-id"] ?? null;
if (orgId) {
await recordSuspensionEvent(
name,
orgId,
suspend ? "suspended" : "resumed"
);
} else {
console.warn(
`billing: tenant ${name} has no zitadel-org-id label; suspension event not recorded`
);
}
} catch (e) {
console.error(
`billing: failed to record suspension event for ${name}:`,
e
);
}
// On admin-side resume, also clear the pending-resume-request
// annotation if it exists. Belt-and-suspenders: the admin-approve
// endpoint already clears it on its happy path, but a platform
// user resuming directly via this endpoint shouldn't leave the
// annotation behind. Best-effort: failure to clear the annotation
// is logged but doesn't fail the resume.
if (!suspend) {
try {
await setTenantAnnotation(
name,
"pieced.ch/resume-request-pending",
null
);
} catch (e) {
console.warn(
"failed to clear resume-request-pending annotation; operator will see it stale until next request transition",
e
);
}
}
return NextResponse.json(
{
message: suspend
? "Subscription cancelled. Your data is preserved for 60 days."
: "Subscription resumed.",
suspend,
},
{ status: 200 }
);
} catch (e: any) {
console.error("Suspend toggle failed:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to update subscription") },
{ status: e.statusCode || 500 }
);
}
}