Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 647afcfbe7 | |||
| b12bca8818 |
@@ -2,7 +2,10 @@ import { getSessionUser, canMutate } from "@/lib/session";
|
|||||||
import { getTranslations, getFormatter } from "next-intl/server";
|
import { getTranslations, getFormatter } from "next-intl/server";
|
||||||
import { redirect } from "next/navigation";
|
import { redirect } from "next/navigation";
|
||||||
import { listTenants } from "@/lib/k8s";
|
import { listTenants } from "@/lib/k8s";
|
||||||
import { listActiveTenantRequestsByOrgId } from "@/lib/db";
|
import {
|
||||||
|
listActiveTenantRequestsByOrgId,
|
||||||
|
syncProvisioningStatuses,
|
||||||
|
} from "@/lib/db";
|
||||||
import {
|
import {
|
||||||
listVisibleTenants,
|
listVisibleTenants,
|
||||||
canSeeInflightRequests,
|
canSeeInflightRequests,
|
||||||
@@ -160,6 +163,23 @@ export default async function DashboardPage() {
|
|||||||
|
|
||||||
// Pending/in-flight requests are only shown to roles that can act on
|
// Pending/in-flight requests are only shown to roles that can act on
|
||||||
// them. `user`-role customers see no request cards.
|
// them. `user`-role customers see no request cards.
|
||||||
|
//
|
||||||
|
// syncProvisioningStatuses runs on every dashboard load: it walks
|
||||||
|
// active and provisioning rows and reconciles them against the
|
||||||
|
// current cluster state. Without this, the operator-initiated
|
||||||
|
// 60-day TTL deletion (Bug 37b) leaves the portal showing "Your
|
||||||
|
// assistant is ready!" cards for tenants that no longer exist —
|
||||||
|
// the operator deletes the CR, but the DB row stays at active=true
|
||||||
|
// until something updates it. Running the sync at every dashboard
|
||||||
|
// load keeps the portal eventually consistent with the cluster
|
||||||
|
// without needing a separate cron/job.
|
||||||
|
//
|
||||||
|
// Cost: one K8s GET per row in (active, provisioning) status. At
|
||||||
|
// pilot scale this is small; if it grows we'd cache or move to a
|
||||||
|
// periodic background job.
|
||||||
|
if (canSeeInflightRequests(user)) {
|
||||||
|
await syncProvisioningStatuses();
|
||||||
|
}
|
||||||
const orgRequests = canSeeInflightRequests(user)
|
const orgRequests = canSeeInflightRequests(user)
|
||||||
? await listActiveTenantRequestsByOrgId(user.orgId)
|
? await listActiveTenantRequestsByOrgId(user.orgId)
|
||||||
: [];
|
: [];
|
||||||
|
|||||||
108
src/lib/db.ts
108
src/lib/db.ts
@@ -63,9 +63,14 @@ const MIGRATION_SQL = `
|
|||||||
CREATE INDEX IF NOT EXISTS idx_tenant_requests_status ON tenant_requests(status);
|
CREATE INDEX IF NOT EXISTS idx_tenant_requests_status ON tenant_requests(status);
|
||||||
CREATE INDEX IF NOT EXISTS idx_tenant_requests_org_id ON tenant_requests(zitadel_org_id);
|
CREATE INDEX IF NOT EXISTS idx_tenant_requests_org_id ON tenant_requests(zitadel_org_id);
|
||||||
CREATE INDEX IF NOT EXISTS idx_tenant_requests_org_status ON tenant_requests(zitadel_org_id, status);
|
CREATE INDEX IF NOT EXISTS idx_tenant_requests_org_status ON tenant_requests(zitadel_org_id, status);
|
||||||
CREATE UNIQUE INDEX IF NOT EXISTS uniq_tenant_requests_tenant_name
|
-- Note: the unique constraint on tenant_name is NOT created here.
|
||||||
ON tenant_requests(tenant_name)
|
-- Pre-Bug-37 we had a non-partial UNIQUE on tenant_name, which is
|
||||||
WHERE tenant_name IS NOT NULL;
|
-- incompatible with resume requests (same tenant_name, different
|
||||||
|
-- request_type). The new partial unique indexes are created
|
||||||
|
-- further down in the migration block, after the request_type
|
||||||
|
-- column has been added and backfilled. This bootstrap section
|
||||||
|
-- only creates indexes that are safe regardless of request_type
|
||||||
|
-- semantics.
|
||||||
|
|
||||||
-- Idempotent column adds for existing databases
|
-- Idempotent column adds for existing databases
|
||||||
ALTER TABLE tenant_requests ADD COLUMN IF NOT EXISTS encrypted_secrets BYTEA;
|
ALTER TABLE tenant_requests ADD COLUMN IF NOT EXISTS encrypted_secrets BYTEA;
|
||||||
@@ -639,8 +644,33 @@ export async function deleteTenantRequest(id: string): Promise<void> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sync provisioning statuses: for all requests with status "provisioning",
|
* Reconcile the portal's tenant_requests table against actual cluster
|
||||||
* check if the PiecedTenant CR has reached "Ready" and update to "active".
|
* state. Three passes, walking only rows with `tenant_name` set:
|
||||||
|
*
|
||||||
|
* 1. provisioning → active: when a tenant CR's phase reaches Ready
|
||||||
|
* or Running, the portal flips the row to active so the
|
||||||
|
* "provisioning…" card transitions into the running tenant view.
|
||||||
|
*
|
||||||
|
* 2. active/provisioning → deleted: when the corresponding CR no
|
||||||
|
* longer exists in the cluster (404), or is mid-deletion (has
|
||||||
|
* metadata.deletionTimestamp set), the row gets flipped to
|
||||||
|
* `deleted`. The DB is otherwise blind to operator-initiated
|
||||||
|
* deletions — when the 60-day TTL fires (Bug 37b) and the
|
||||||
|
* operator deletes a suspended tenant, the portal would happily
|
||||||
|
* keep showing the "Your assistant is ready!" card forever.
|
||||||
|
* Without this reconciliation the dashboard drifts from reality.
|
||||||
|
*
|
||||||
|
* 3. pending resume → cancelled: when a pending resume request's
|
||||||
|
* tenant is no longer suspended (admin resumed it directly,
|
||||||
|
* tenant was deleted, or it was never suspended in the first
|
||||||
|
* place), the request is moot. Flip to 'cancelled' so the
|
||||||
|
* pending-resume unique index releases for any future genuine
|
||||||
|
* resume request. We pick `cancelled` over `rejected` because
|
||||||
|
* the customer didn't do anything wrong — circumstances just
|
||||||
|
* changed.
|
||||||
|
*
|
||||||
|
* Errors are tolerated per-row: a transient API hiccup on one tenant
|
||||||
|
* shouldn't fail the whole sweep. Skipped rows get retried next call.
|
||||||
*
|
*
|
||||||
* Slice 3 note: with multi-tenant per org, this iterates each row
|
* Slice 3 note: with multi-tenant per org, this iterates each row
|
||||||
* individually (keyed by its own tenant_name), so multiple in-flight
|
* individually (keyed by its own tenant_name), so multiple in-flight
|
||||||
@@ -648,25 +678,79 @@ export async function deleteTenantRequest(id: string): Promise<void> {
|
|||||||
*/
|
*/
|
||||||
export async function syncProvisioningStatuses(): Promise<void> {
|
export async function syncProvisioningStatuses(): Promise<void> {
|
||||||
await ensureSchema();
|
await ensureSchema();
|
||||||
|
// Active+provisioning rows: status reflects "the tenant should
|
||||||
|
// exist and be running".
|
||||||
|
// Pending resume rows: status reflects "the tenant is suspended,
|
||||||
|
// awaiting reactivation".
|
||||||
|
// Both need cluster-side validation; we fetch them in one query
|
||||||
|
// and dispatch on (status, request_type).
|
||||||
const result = await getPool().query<TenantRequest>(
|
const result = await getPool().query<TenantRequest>(
|
||||||
"SELECT * FROM tenant_requests WHERE status = 'provisioning'"
|
`SELECT * FROM tenant_requests
|
||||||
|
WHERE tenant_name IS NOT NULL
|
||||||
|
AND (
|
||||||
|
status IN ('provisioning', 'active')
|
||||||
|
OR (status = 'pending' AND request_type = 'resume')
|
||||||
|
)`
|
||||||
);
|
);
|
||||||
|
|
||||||
for (const row of result.rows) {
|
for (const row of result.rows) {
|
||||||
const mapped = mapRow(row);
|
const mapped = mapRow(row);
|
||||||
if (!mapped.tenantName) continue;
|
if (!mapped.tenantName) continue;
|
||||||
|
|
||||||
|
let tenant: Awaited<ReturnType<typeof getTenant>> = null;
|
||||||
try {
|
try {
|
||||||
const tenant = await getTenant(mapped.tenantName);
|
tenant = await getTenant(mapped.tenantName);
|
||||||
|
} catch {
|
||||||
|
// Transient API error — skip this row, retry on next sweep.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pending resume request: validity hinges on tenant being suspended.
|
||||||
if (
|
if (
|
||||||
tenant?.status?.phase === "Ready" ||
|
mapped.status === "pending" &&
|
||||||
tenant?.status?.phase === "Running"
|
mapped.requestType === "resume"
|
||||||
|
) {
|
||||||
|
// Tenant doesn't exist or is being deleted: cancel the resume
|
||||||
|
// request (it can never be fulfilled). Don't fall through to
|
||||||
|
// the "deleted" branch below — that would also flip the
|
||||||
|
// provision row, which is the right thing for a CR-level
|
||||||
|
// deletion but we want this resume row specifically resolved
|
||||||
|
// here.
|
||||||
|
if (!tenant || tenant.metadata.deletionTimestamp) {
|
||||||
|
await updateTenantRequestStatus(mapped.id, "cancelled");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Tenant is no longer suspended: the request is moot.
|
||||||
|
// Cancel it (the customer didn't do anything wrong; the
|
||||||
|
// condition the request was about no longer applies).
|
||||||
|
if (!tenant.spec.suspend) {
|
||||||
|
await updateTenantRequestStatus(mapped.id, "cancelled");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// Tenant still suspended, request still relevant. Leave as-is.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Active or provisioning row: CR gone, or mid-deletion. Flip the
|
||||||
|
// row to 'deleted'. `markTenantRequestDeletedByTenantName` flips
|
||||||
|
// every row with this tenant_name (provision + any resume rows),
|
||||||
|
// which is the right thing for a CR-level deletion.
|
||||||
|
if (!tenant || tenant.metadata.deletionTimestamp) {
|
||||||
|
await markTenantRequestDeletedByTenantName(mapped.tenantName);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// CR exists and is healthy. Promote provisioning → active when
|
||||||
|
// the operator reports the tenant has reached steady state.
|
||||||
|
// Keep `active` rows on `active` regardless of phase — a
|
||||||
|
// temporarily-Reconfiguring tenant is still active from the
|
||||||
|
// portal's billing/visibility perspective.
|
||||||
|
if (
|
||||||
|
mapped.status === "provisioning" &&
|
||||||
|
(tenant.status?.phase === "Ready" || tenant.status?.phase === "Running")
|
||||||
) {
|
) {
|
||||||
await updateTenantRequestStatus(mapped.id, "active");
|
await updateTenantRequestStatus(mapped.id, "active");
|
||||||
}
|
}
|
||||||
} catch {
|
|
||||||
// Tenant might not exist yet — skip
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -134,6 +134,15 @@ export interface PiecedTenant {
|
|||||||
name: string;
|
name: string;
|
||||||
namespace?: string;
|
namespace?: string;
|
||||||
creationTimestamp?: string;
|
creationTimestamp?: string;
|
||||||
|
/**
|
||||||
|
* Set by the API server when something issues a Delete on the CR.
|
||||||
|
* The CR continues to exist while finalizers run cleanup; once
|
||||||
|
* they all remove themselves, the API server permanently removes
|
||||||
|
* the CR. Used by the portal's status sync to detect tenants
|
||||||
|
* being torn down — the customer should see "Deleted" rather
|
||||||
|
* than "Ready" while the cleanup runs.
|
||||||
|
*/
|
||||||
|
deletionTimestamp?: string;
|
||||||
labels?: Record<string, string>;
|
labels?: Record<string, string>;
|
||||||
annotations?: Record<string, string>;
|
annotations?: Record<string, string>;
|
||||||
};
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user