Compare commits

..

2 Commits

Author SHA1 Message Date
647afcfbe7 Suspendedremoval display in Frontend
All checks were successful
Build and Push / build (push) Successful in 1m31s
2026-05-01 21:48:25 +02:00
b12bca8818 Suspendedremoval display in Frontend
All checks were successful
Build and Push / build (push) Successful in 1m28s
2026-05-01 21:39:16 +02:00
3 changed files with 128 additions and 15 deletions

View File

@@ -2,7 +2,10 @@ import { getSessionUser, canMutate } from "@/lib/session";
import { getTranslations, getFormatter } from "next-intl/server";
import { redirect } from "next/navigation";
import { listTenants } from "@/lib/k8s";
import { listActiveTenantRequestsByOrgId } from "@/lib/db";
import {
listActiveTenantRequestsByOrgId,
syncProvisioningStatuses,
} from "@/lib/db";
import {
listVisibleTenants,
canSeeInflightRequests,
@@ -160,6 +163,23 @@ export default async function DashboardPage() {
// Pending/in-flight requests are only shown to roles that can act on
// them. `user`-role customers see no request cards.
//
// syncProvisioningStatuses runs on every dashboard load: it walks
// active and provisioning rows and reconciles them against the
// current cluster state. Without this, the operator-initiated
// 60-day TTL deletion (Bug 37b) leaves the portal showing "Your
// assistant is ready!" cards for tenants that no longer exist —
// the operator deletes the CR, but the DB row stays at active=true
// until something updates it. Running the sync at every dashboard
// load keeps the portal eventually consistent with the cluster
// without needing a separate cron/job.
//
// Cost: one K8s GET per row in (active, provisioning) status. At
// pilot scale this is small; if it grows we'd cache or move to a
// periodic background job.
if (canSeeInflightRequests(user)) {
await syncProvisioningStatuses();
}
const orgRequests = canSeeInflightRequests(user)
? await listActiveTenantRequestsByOrgId(user.orgId)
: [];

View File

@@ -63,9 +63,14 @@ const MIGRATION_SQL = `
CREATE INDEX IF NOT EXISTS idx_tenant_requests_status ON tenant_requests(status);
CREATE INDEX IF NOT EXISTS idx_tenant_requests_org_id ON tenant_requests(zitadel_org_id);
CREATE INDEX IF NOT EXISTS idx_tenant_requests_org_status ON tenant_requests(zitadel_org_id, status);
CREATE UNIQUE INDEX IF NOT EXISTS uniq_tenant_requests_tenant_name
ON tenant_requests(tenant_name)
WHERE tenant_name IS NOT NULL;
-- Note: the unique constraint on tenant_name is NOT created here.
-- Pre-Bug-37 we had a non-partial UNIQUE on tenant_name, which is
-- incompatible with resume requests (same tenant_name, different
-- request_type). The new partial unique indexes are created
-- further down in the migration block, after the request_type
-- column has been added and backfilled. This bootstrap section
-- only creates indexes that are safe regardless of request_type
-- semantics.
-- Idempotent column adds for existing databases
ALTER TABLE tenant_requests ADD COLUMN IF NOT EXISTS encrypted_secrets BYTEA;
@@ -639,8 +644,33 @@ export async function deleteTenantRequest(id: string): Promise<void> {
}
/**
* Sync provisioning statuses: for all requests with status "provisioning",
* check if the PiecedTenant CR has reached "Ready" and update to "active".
* Reconcile the portal's tenant_requests table against actual cluster
* state. Three passes, walking only rows with `tenant_name` set:
*
* 1. provisioning → active: when a tenant CR's phase reaches Ready
* or Running, the portal flips the row to active so the
* "provisioning…" card transitions into the running tenant view.
*
* 2. active/provisioning → deleted: when the corresponding CR no
* longer exists in the cluster (404), or is mid-deletion (has
* metadata.deletionTimestamp set), the row gets flipped to
* `deleted`. The DB is otherwise blind to operator-initiated
* deletions — when the 60-day TTL fires (Bug 37b) and the
* operator deletes a suspended tenant, the portal would happily
* keep showing the "Your assistant is ready!" card forever.
* Without this reconciliation the dashboard drifts from reality.
*
* 3. pending resume → cancelled: when a pending resume request's
* tenant is no longer suspended (admin resumed it directly,
* tenant was deleted, or it was never suspended in the first
* place), the request is moot. Flip to 'cancelled' so the
* pending-resume unique index releases for any future genuine
* resume request. We pick `cancelled` over `rejected` because
* the customer didn't do anything wrong — circumstances just
* changed.
*
* Errors are tolerated per-row: a transient API hiccup on one tenant
* shouldn't fail the whole sweep. Skipped rows get retried next call.
*
* Slice 3 note: with multi-tenant per org, this iterates each row
* individually (keyed by its own tenant_name), so multiple in-flight
@@ -648,24 +678,78 @@ export async function deleteTenantRequest(id: string): Promise<void> {
*/
export async function syncProvisioningStatuses(): Promise<void> {
await ensureSchema();
// Active+provisioning rows: status reflects "the tenant should
// exist and be running".
// Pending resume rows: status reflects "the tenant is suspended,
// awaiting reactivation".
// Both need cluster-side validation; we fetch them in one query
// and dispatch on (status, request_type).
const result = await getPool().query<TenantRequest>(
"SELECT * FROM tenant_requests WHERE status = 'provisioning'"
`SELECT * FROM tenant_requests
WHERE tenant_name IS NOT NULL
AND (
status IN ('provisioning', 'active')
OR (status = 'pending' AND request_type = 'resume')
)`
);
for (const row of result.rows) {
const mapped = mapRow(row);
if (!mapped.tenantName) continue;
let tenant: Awaited<ReturnType<typeof getTenant>> = null;
try {
const tenant = await getTenant(mapped.tenantName);
if (
tenant?.status?.phase === "Ready" ||
tenant?.status?.phase === "Running"
) {
await updateTenantRequestStatus(mapped.id, "active");
}
tenant = await getTenant(mapped.tenantName);
} catch {
// Tenant might not exist yet — skip
// Transient API error — skip this row, retry on next sweep.
continue;
}
// Pending resume request: validity hinges on tenant being suspended.
if (
mapped.status === "pending" &&
mapped.requestType === "resume"
) {
// Tenant doesn't exist or is being deleted: cancel the resume
// request (it can never be fulfilled). Don't fall through to
// the "deleted" branch below — that would also flip the
// provision row, which is the right thing for a CR-level
// deletion but we want this resume row specifically resolved
// here.
if (!tenant || tenant.metadata.deletionTimestamp) {
await updateTenantRequestStatus(mapped.id, "cancelled");
continue;
}
// Tenant is no longer suspended: the request is moot.
// Cancel it (the customer didn't do anything wrong; the
// condition the request was about no longer applies).
if (!tenant.spec.suspend) {
await updateTenantRequestStatus(mapped.id, "cancelled");
continue;
}
// Tenant still suspended, request still relevant. Leave as-is.
continue;
}
// Active or provisioning row: CR gone, or mid-deletion. Flip the
// row to 'deleted'. `markTenantRequestDeletedByTenantName` flips
// every row with this tenant_name (provision + any resume rows),
// which is the right thing for a CR-level deletion.
if (!tenant || tenant.metadata.deletionTimestamp) {
await markTenantRequestDeletedByTenantName(mapped.tenantName);
continue;
}
// CR exists and is healthy. Promote provisioning → active when
// the operator reports the tenant has reached steady state.
// Keep `active` rows on `active` regardless of phase — a
// temporarily-Reconfiguring tenant is still active from the
// portal's billing/visibility perspective.
if (
mapped.status === "provisioning" &&
(tenant.status?.phase === "Ready" || tenant.status?.phase === "Running")
) {
await updateTenantRequestStatus(mapped.id, "active");
}
}
}

View File

@@ -134,6 +134,15 @@ export interface PiecedTenant {
name: string;
namespace?: string;
creationTimestamp?: string;
/**
* Set by the API server when something issues a Delete on the CR.
* The CR continues to exist while finalizers run cleanup; once
* they all remove themselves, the API server permanently removes
* the CR. Used by the portal's status sync to detect tenants
* being torn down — the customer should see "Deleted" rather
* than "Ready" while the cleanup runs.
*/
deletionTimestamp?: string;
labels?: Record<string, string>;
annotations?: Record<string, string>;
};