Suspendedremoval
Some checks failed
Build and Push / build (push) Failing after 48s

This commit is contained in:
2026-05-01 18:07:00 +02:00
parent 7d58c78cb9
commit a5812dca9a
16 changed files with 880 additions and 90 deletions

View File

@@ -174,7 +174,16 @@ export default async function DashboardPage() {
(t) => t.metadata.labels?.["pieced.ch/zitadel-org-id"] === user.orgId
);
const inflightRequests = orgRequests.filter(
(r) => !r.tenantName || !orgScopedTenants.some((t) => t.metadata.name === r.tenantName)
(r) =>
// Only show provision (initial creation) requests on the
// dashboard. Resume requests (Bug 37a) belong with their
// specific tenant — the SubscriptionToggle on the tenant
// detail page renders the pending state there. Showing them
// on the dashboard too would duplicate the surface and
// confuse customers about which tenant they refer to.
r.requestType !== "resume" &&
(!r.tenantName ||
!orgScopedTenants.some((t) => t.metadata.name === r.tenantName))
);
// Slice 5: only owners (and platform users, who'd typically be using

View File

@@ -3,6 +3,7 @@ import { getTranslations, getFormatter } from "next-intl/server";
import { redirect, notFound } from "next/navigation";
import { getTenant } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { getPendingResumeRequestForTenant } from "@/lib/db";
import { StatusBadge } from "@/components/ui/status-badge";
import { WarningBadge } from "@/components/ui/warning-badge";
import { UsageDisplay } from "@/components/dashboard/usage-display";
@@ -47,6 +48,13 @@ export default async function TenantDetailPage({
// The current state comes from spec.suspend on the CR.
const isSuspended = Boolean(tenant.spec.suspend);
// Bug 37a: when the tenant is suspended, an owner can request
// reactivation (admin-gated). Look up whether one is in flight so
// the SubscriptionToggle can render the right state.
const pendingResumeRequest = isSuspended
? await getPendingResumeRequestForTenant(name)
: null;
// Bug 7: assigned-users panel is meaningless for personal tenants
// (sole-owner by definition; the only "assignee" is the owner
// themselves). We hide the panel when EITHER the CR carries the
@@ -208,7 +216,19 @@ export default async function TenantDetailPage({
? t("subscriptionDescriptionSuspended")
: t("subscriptionDescriptionActive")}
</p>
<SubscriptionToggle tenantName={name} suspended={isSuspended} />
<SubscriptionToggle
tenantName={name}
suspended={isSuspended}
isPlatform={user.isPlatform}
pendingResumeRequest={
pendingResumeRequest
? {
id: pendingResumeRequest.id,
createdAt: pendingResumeRequest.createdAt,
}
: null
}
/>
</section>
)}
</div>

View File

@@ -5,7 +5,7 @@ import {
updateTenantRequestStatus,
clearEncryptedSecrets,
} from "@/lib/db";
import { createTenant } from "@/lib/k8s";
import { createTenant, patchTenantSpec, setTenantAnnotation } from "@/lib/k8s";
import { sendApprovalEmail } from "@/lib/email";
import { decryptSecrets } from "@/lib/crypto";
import { writePackageSecrets } from "@/lib/openbao";
@@ -19,14 +19,26 @@ import { safeError } from "@/lib/errors";
/**
* POST /api/admin/requests/[id]/approve
* Approve a tenant request:
* 1. Decrypt stored package secrets (if any)
* 2. Write each package's secrets to OpenBao at secret/data/tenants/{tenant-name}/{package}
* 3. Null the encrypted_secrets column
* 4. Build workspace files (SOUL.md, AGENTS.md, TOOLS.md)
* 5. Create PiecedTenant CR
* 6. Update request status, notify customer.
* Also supports re-approving a previously rejected request (clears admin notes).
*
* Approve a request. Two paths depending on request_type:
*
* Provision (the original purpose):
* 1. Decrypt stored package secrets (if any)
* 2. Write each package's secrets to OpenBao
* 3. Null the encrypted_secrets column
* 4. Build workspace files (SOUL.md, AGENTS.md, TOOLS.md)
* 5. Create PiecedTenant CR
* 6. Update request status, notify customer.
* Supports re-approving a previously rejected request (clears admin notes).
*
* Resume (Bug 37a):
* 1. PATCH spec.suspend=false on the existing PiecedTenant CR.
* 2. Clear the `pieced.ch/resume-request-pending` annotation so the
* operator knows the request is settled (and doesn't pause its
* 60-day TTL forever — though now that the tenant isn't suspended,
* the timer is moot).
* 3. Mark request approved, notify customer.
* No CR creation, no secret materialisation, no workspace files.
*/
export async function POST(
request: Request,
@@ -60,6 +72,56 @@ export async function POST(
);
}
// Resume request: short path. Just patch the existing tenant, clear
// the annotation, mark approved.
if (tenantRequest.requestType === "resume") {
if (!tenantRequest.tenantName) {
// Shouldn't happen — resume requests are created with tenant_name
// set. Defensive 500 if it does.
return NextResponse.json(
{ error: "Resume request has no tenant_name" },
{ status: 500 }
);
}
try {
await patchTenantSpec(tenantRequest.tenantName, { suspend: false });
// Clear the annotation that pauses the operator's 60-day TTL.
// Best-effort — annotation cleanup is also done by the operator
// when it sees suspend=false on the next reconcile (it clears
// status.suspendedAt), but explicitly clearing here keeps the
// CR clean.
try {
await setTenantAnnotation(
tenantRequest.tenantName,
"pieced.ch/resume-request-pending",
null
);
} catch (e) {
console.warn(
"post-approve annotation clear failed; not blocking",
e
);
}
await updateTenantRequestStatus(id, "approved", adminNotes);
await sendApprovalEmail(tenantRequest, tenantRequest.tenantName).catch(
(e) => console.error("approval email failed:", e)
);
return NextResponse.json({
message: "Resume approved. Tenant is reactivating.",
tenantName: tenantRequest.tenantName,
});
} catch (e: any) {
console.error("Resume approval failed:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to approve resume") },
{ status: 500 }
);
}
}
const isReApproval = tenantRequest.status === "rejected";
// Build the CR name: see `lib/tenant-naming.ts` for the format spec.

View File

@@ -1,11 +1,19 @@
import { NextResponse } from "next/server";
import { requirePlatformRole } from "@/lib/session";
import { getTenantRequestById, updateTenantRequestStatus } from "@/lib/db";
import { setTenantAnnotation } from "@/lib/k8s";
import { sendRejectionEmail } from "@/lib/email";
/**
* POST /api/admin/requests/[id]/reject
* Reject a tenant request and notify the customer.
*
* For resume requests (Bug 37a): also clears the
* `pieced.ch/resume-request-pending` annotation on the tenant CR.
* The operator's 60-day TTL then resumes counting from the original
* suspendedAt — rejection doesn't reset it. The customer can submit
* a fresh resume request later if circumstances change, but that
* starts a new pending row and re-stamps the annotation.
*/
export async function POST(
request: Request,
@@ -37,6 +45,26 @@ export async function POST(
adminNotes,
});
// Resume rejection: clear the annotation so the operator's TTL
// resumes. Best-effort — failure is logged, not propagated.
if (
tenantRequest.requestType === "resume" &&
tenantRequest.tenantName
) {
try {
await setTenantAnnotation(
tenantRequest.tenantName,
"pieced.ch/resume-request-pending",
null
);
} catch (e) {
console.warn(
"post-reject annotation clear failed; operator's TTL will pause until annotation removed by admin",
e
);
}
}
// Notify customer
await sendRejectionEmail(
tenantRequest.contactEmail,

View File

@@ -6,6 +6,7 @@ import {
updateTenantRequestEditableFields,
} from "@/lib/db";
import { encryptSecrets } from "@/lib/crypto";
import { setTenantAnnotation } from "@/lib/k8s";
import { onboardingSchema } from "@/lib/validation";
import { safeError } from "@/lib/errors";
@@ -91,6 +92,25 @@ export async function DELETE(
try {
await updateTenantRequestStatus(id, "cancelled");
// Customer cancels their own pending resume request: clear the
// operator-side annotation so the 60-day TTL resumes counting.
// Best-effort — the operator handles missing annotation gracefully.
if (tr.requestType === "resume" && tr.tenantName) {
try {
await setTenantAnnotation(
tr.tenantName,
"pieced.ch/resume-request-pending",
null
);
} catch (e) {
console.warn(
"post-cancel annotation clear failed; not blocking",
e
);
}
}
return NextResponse.json({ message: "Request cancelled.", id });
} catch (e: any) {
console.error("Failed to cancel request:", e);

View File

@@ -0,0 +1,154 @@
import { NextRequest, NextResponse } from "next/server";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant, setTenantAnnotation } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import {
createResumeRequest,
getPendingResumeRequestForTenant,
getTenantRequestByTenantName,
} from "@/lib/db";
import { safeError } from "@/lib/errors";
/**
* POST /api/tenants/[name]/resume-request
*
* Owner-initiated request to reactivate a suspended tenant (Bug 37a).
* Creates a pending tenant_request of type 'resume' for admin review,
* and stamps the PiecedTenant CR with an annotation that pauses the
* operator's 60-day deletion timer.
*
* Why a request flow at all
* -------------------------
* Customers can self-serve cancel; resume requires admin oversight.
* Reactivation may involve re-validating billing, confirming the
* customer still wants to be active, or other manual steps. The
* request flow gives admins a queue to review, with the same approve/
* reject UX as initial provision requests.
*
* Authorization
* -------------
* Owners and platform admins. Platform admins shouldn't normally use
* this endpoint — they have direct PATCH suspend access — but it's
* permissive in case admin tooling pivots.
*
* Validation
* ----------
* - Tenant must exist and be visible to the caller.
* - Tenant must be currently suspended. Resuming an active tenant
* is meaningless.
* - At most one pending resume request per tenant. Enforced by the
* DB's partial unique index, but we also check explicitly here to
* return a friendly 409 instead of a 500.
*
* Side effects on success
* -----------------------
* - INSERT into tenant_requests (request_type='resume', status='pending')
* - PATCH annotation `pieced.ch/resume-request-pending=<request-id>` on
* the CR. This is the operator's signal to pause its 60-day deletion
* timer until the request transitions to terminal.
*
* The annotation set is best-effort: if the K8s PATCH fails after the
* DB insert, the row exists without the annotation. The customer
* sees the request as pending; admin can still approve. The only
* functional consequence is the 60-day timer doesn't pause until the
* next request transition, which is fine in practice (admin response
* times are dramatically shorter than 60 days).
*/
export async function POST(
req: NextRequest,
{ params }: { params: Promise<{ name: string }> }
) {
const user = await getSessionUser();
if (!user) {
return NextResponse.json({ error: "Unauthorized" }, { status: 401 });
}
if (!canMutate(user)) {
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
}
const { name } = await params;
const tenant = await getTenant(name);
if (!tenant) {
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
if (!(await canUserSeeTenant(user, tenant))) {
return NextResponse.json({ error: "Not found" }, { status: 404 });
}
if (!tenant.spec.suspend) {
return NextResponse.json(
{ error: "Tenant is not suspended; nothing to resume." },
{ status: 409 }
);
}
// Already a pending request? Don't duplicate.
const existing = await getPendingResumeRequestForTenant(name);
if (existing) {
return NextResponse.json(
{
error: "A resume request for this tenant is already pending.",
request: { id: existing.id, createdAt: existing.createdAt },
},
{ status: 409 }
);
}
// Pull traceability fields (companyName, agentName) from the original
// provision request. The schema marks these NOT NULL, so we have to
// populate them; copying from the provision row keeps the resume
// row navigable in the admin UI without making up values.
const provision = await getTenantRequestByTenantName(name);
try {
const resumeRequest = await createResumeRequest({
tenantName: name,
zitadelOrgId: tenant.metadata.labels?.[
"pieced.ch/zitadel-org-id"
] ?? user.zitadelOrgId,
zitadelUserId: user.id,
contactName: user.name ?? user.email ?? "Unknown",
contactEmail: user.email ?? "unknown@example.invalid",
companyName: provision?.companyName ?? tenant.spec.displayName ?? name,
agentName: provision?.agentName ?? "Assistant",
});
// Stamp the annotation so the operator pauses its TTL. If this
// fails the request still exists; surface the error so admin
// tooling can re-stamp if needed, but don't roll back.
try {
await setTenantAnnotation(
name,
"pieced.ch/resume-request-pending",
resumeRequest.id
);
} catch (e) {
console.warn(
"resume request created but annotation could not be set; operator's 60-day timer will not pause until next reconcile triggered by request transition",
e
);
}
return NextResponse.json(
{
message: "Resume request submitted. An admin will review shortly.",
request: { id: resumeRequest.id, status: resumeRequest.status },
},
{ status: 201 }
);
} catch (e: any) {
// Unique violation (a pending row already exists for this tenant)
// is friendly-handled above; this catches everything else.
if (e.code === "23505") {
return NextResponse.json(
{ error: "A resume request for this tenant is already pending." },
{ status: 409 }
);
}
console.error("Resume request creation failed:", e);
return NextResponse.json(
{ error: safeError(e, "Failed to submit resume request") },
{ status: 500 }
);
}
}

View File

@@ -1,7 +1,7 @@
import { NextRequest, NextResponse } from "next/server";
import { z } from "zod";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant, patchTenantSpec } from "@/lib/k8s";
import { getTenant, patchTenantSpec, setTenantAnnotation } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { safeError } from "@/lib/errors";
@@ -12,37 +12,38 @@ const patchSchema = z.object({
/**
* PATCH /api/tenants/[name]/suspend
*
* Customer-side "Cancel subscription" / "Resume" toggle (Bug 31).
* Direct suspend control on the PiecedTenant CR. Sets `spec.suspend`
* to true (cancel) or false (resume).
*
* Sets `spec.suspend` on the PiecedTenant CR. The operator interprets
* this flag as "stop reconciling this tenant" — workloads, packages,
* and channel-user changes are no longer applied. Existing data is
* preserved (namespace, ConfigMaps, OpenBao secrets, CNPG database,
* billing records). Resuming sets the flag back to false and the
* operator picks up reconciliation on the next loop.
* Authorization (Bug 37a)
* -----------------------
* - suspend=true → owners and platform admins may call.
* - suspend=false → platform admins ONLY. Owners must go through the
* resume-request flow (POST /api/tenants/[name]/resume-request),
* which creates a pending request for admin approval. This
* asymmetry is by design: cancellation is self-service (low risk;
* reversible by request); reactivation requires admin oversight
* (e.g. to re-validate billing, confirm intent).
*
* Authorization
* -------------
* - Customer-side: only an `owner` of the tenant's org may call this.
* `canMutate` is the right gate (mirrors the rest of the customer
* API surface). User-role members cannot cancel a subscription.
* - Platform staff: allowed via `canMutate`'s isPlatform branch, but
* in practice they should use admin tooling for this — the action
* is exposed here for the customer's benefit.
* Customer flow:
* - Cancel: PATCH suspend=true here
* - Resume: POST /resume-request — creates a 'resume' tenant_request,
* admin approves via /api/admin/requests/[id]/approve which
* then PATCHes suspend=false here as a platform user.
*
* Visibility check is via `canUserSeeTenant` — same notFound() trick
* as the detail page, so we don't leak existence of tenants the
* caller can't see.
* Workload behaviour
* ------------------
* On suspend=true the operator deletes the OpenClawInstance, stopping
* the pod within seconds. Tenant data — namespace, ConfigMaps,
* OpenBao secrets, CNPG database, LiteLLM team — is retained.
*
* Note on workload teardown
* -------------------------
* As of this writing, the operator's `suspend` handling is "skip
* reconciliation and set status.phase to Suspended". The underlying
* StatefulSet keeps running until next reconciliation, which won't
* happen while suspended. Group D will add scale-to-zero so cancelled
* subscriptions actually stop incurring compute. Until then, an
* operator following up with a `kubectl scale` is the workaround.
* Customer data is preserved either way.
* Suspended tenants enter a 60-day retention window (operator
* constant `retentionAfterSuspend`); after that, the tenant is fully
* deleted unless a pending resume request exists. The operator
* checks the `pieced.ch/resume-request-pending` annotation to know
* about pending requests; we set it here when admin approves the
* resume (transitively, via the admin-approve endpoint), and clear
* it when the request reaches a terminal state.
*/
export async function PATCH(
req: NextRequest,
@@ -76,6 +77,18 @@ export async function PATCH(
}
const { suspend } = parsed.data;
// Bug 37a: resume (suspend=false) is platform-admin only via this
// endpoint. Owners must go through the resume-request flow.
if (!suspend && !user.isPlatform) {
return NextResponse.json(
{
error:
"Resume requires platform-admin approval. Submit a resume request via /api/tenants/[name]/resume-request.",
},
{ status: 403 }
);
}
// No-op early exit. Avoids a needless K8s patch + status churn when
// the user double-clicks the button or the UI is briefly out of sync.
if (Boolean(tenant.spec.suspend) === suspend) {
@@ -87,10 +100,32 @@ export async function PATCH(
try {
await patchTenantSpec(name, { suspend });
// On admin-side resume, also clear the pending-resume-request
// annotation if it exists. Belt-and-suspenders: the admin-approve
// endpoint already clears it on its happy path, but a platform
// user resuming directly via this endpoint shouldn't leave the
// annotation behind. Best-effort: failure to clear the annotation
// is logged but doesn't fail the resume.
if (!suspend) {
try {
await setTenantAnnotation(
name,
"pieced.ch/resume-request-pending",
null
);
} catch (e) {
console.warn(
"failed to clear resume-request-pending annotation; operator will see it stale until next request transition",
e
);
}
}
return NextResponse.json(
{
message: suspend
? "Subscription cancelled. Your data is preserved."
? "Subscription cancelled. Your data is preserved for 60 days."
: "Subscription resumed.",
suspend,
},