Suspendedremoval
Some checks failed
Build and Push / build (push) Failing after 48s

This commit is contained in:
2026-05-01 18:07:00 +02:00
parent 7d58c78cb9
commit a5812dca9a
16 changed files with 880 additions and 90 deletions

View File

@@ -1,7 +1,7 @@
import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant, patchTenantSpec } from "@/lib/k8s";
import { getTenant, patchTenantSpec, setTenantAnnotation } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { safeError } from "@/lib/errors";
@@ -12,37 +12,38 @@ const patchSchema = z.object({
/**
* PATCH /api/tenants/[name]/suspend
*
* Customer-side "Cancel subscription" / "Resume" toggle (Bug 31).
* Direct suspend control on the PiecedTenant CR. Sets `spec.suspend`
* to true (cancel) or false (resume).
*
* Sets `spec.suspend` on the PiecedTenant CR. The operator interprets
* this flag as "stop reconciling this tenant" — workloads, packages,
* and channel-user changes are no longer applied. Existing data is
* preserved (namespace, ConfigMaps, OpenBao secrets, CNPG database,
* billing records). Resuming sets the flag back to false and the
* operator picks up reconciliation on the next loop.
* Authorization (Bug 37a)
* -----------------------
* - suspend=true → owners and platform admins may call.
* - suspend=false → platform admins ONLY. Owners must go through the
* resume-request flow (POST /api/tenants/[name]/resume-request),
* which creates a pending request for admin approval. This
* asymmetry is by design: cancellation is self-service (low risk;
* reversible by request); reactivation requires admin oversight
* (e.g. to re-validate billing, confirm intent).
*
* Authorization
* -------------
* - Customer-side: only an `owner` of the tenant's org may call this.
* `canMutate` is the right gate (mirrors the rest of the customer
* API surface). User-role members cannot cancel a subscription.
* - Platform staff: allowed via `canMutate`'s isPlatform branch, but
* in practice they should use admin tooling for this — the action
* is exposed here for the customer's benefit.
* Customer flow:
* - Cancel: PATCH suspend=true here
* - Resume: POST /resume-request — creates a 'resume' tenant_request,
* admin approves via /api/admin/requests/[id]/approve which
* then PATCHes suspend=false here as a platform user.
*
* Visibility check is via `canUserSeeTenant` — same notFound() trick
* as the detail page, so we don't leak existence of tenants the
* caller can't see.
* Workload behaviour
* ------------------
* On suspend=true the operator deletes the OpenClawInstance, stopping
* the pod within seconds. Tenant data — namespace, ConfigMaps,
* OpenBao secrets, CNPG database, LiteLLM team — is retained.
*
* Note on workload teardown
* -------------------------
* As of this writing, the operator's `suspend` handling is "skip
* reconciliation and set status.phase to Suspended". The underlying
* StatefulSet keeps running until next reconciliation, which won't
* happen while suspended. Group D will add scale-to-zero so cancelled
* subscriptions actually stop incurring compute. Until then, an
* operator following up with a `kubectl scale` is the workaround.
* Customer data is preserved either way.
* Suspended tenants enter a 60-day retention window (operator
* constant `retentionAfterSuspend`); after that, the tenant is fully
* deleted unless a pending resume request exists. The operator
* checks the `pieced.ch/resume-request-pending` annotation to know
* about pending requests; we set it here when admin approves the
* resume (transitively, via the admin-approve endpoint), and clear
* it when the request reaches a terminal state.
*/
export async function PATCH(
req: NextRequest,
@@ -76,6 +77,18 @@ export async function PATCH(
}
const { suspend } = parsed.data;
// Bug 37a: resume (suspend=false) is platform-admin only via this
// endpoint. Owners must go through the resume-request flow.
if (!suspend && !user.isPlatform) {
return NextResponse.json(
{
error:
"Resume requires platform-admin approval. Submit a resume request via /api/tenants/[name]/resume-request.",
},
{ status: 403 }
);
}
// No-op early exit. Avoids a needless K8s patch + status churn when
// the user double-clicks the button or the UI is briefly out of sync.
if (Boolean(tenant.spec.suspend) === suspend) {
@@ -87,10 +100,32 @@ export async function PATCH(
try {
await patchTenantSpec(name, { suspend });
// On admin-side resume, also clear the pending-resume-request
// annotation if it exists. Belt-and-suspenders: the admin-approve
// endpoint already clears it on its happy path, but a platform
// user resuming directly via this endpoint shouldn't leave the
// annotation behind. Best-effort: failure to clear the annotation
// is logged but doesn't fail the resume.
if (!suspend) {
try {
await setTenantAnnotation(
name,
"pieced.ch/resume-request-pending",
null
);
} catch (e) {
console.warn(
"failed to clear resume-request-pending annotation; operator will see it stale until next request transition",
e
);
}
}
return NextResponse.json(
{
message: suspend
? "Subscription cancelled. Your data is preserved."
? "Subscription cancelled. Your data is preserved for 60 days."
: "Subscription resumed.",
suspend,
},