Phase1: Schema + skill event tracking
Some checks failed
Build and Push / build (push) Failing after 38s

This commit is contained in:
2026-05-23 23:45:04 +02:00
parent 55571b1e59
commit ce70fe8480
8 changed files with 1406 additions and 59 deletions

View File

@@ -0,0 +1,70 @@
import { NextResponse } from "next/server";
import { requirePlatformRole } from "@/lib/session";
import { listTenants } from "@/lib/k8s";
import { backfillTenantBillingLifecycle } from "@/lib/db";
import { safeError } from "@/lib/errors";
/**
* POST /api/admin/billing/backfill
*
* One-off bootstrap that reads every live PiecedTenant CR and
* mirrors it into the Phase 1 billing tables:
* - tenant_billing_lifecycle.created_at ← CR's creationTimestamp
* - tenant_skill_events: one 'enabled' event per package in
* spec.packages, anchored at the CR's creationTimestamp
* - tenant_suspension_events: one 'suspended' event if the CR is
* currently suspended (anchored at status.suspendedAt)
*
* Idempotent — re-running is safe. The helper only inserts rows
* for tenants that have no lifecycle row / no events yet; running
* twice produces zero additional rows.
*
* Authorization: platform role only. The body of the request is
* ignored.
*
* Response: counts of rows inserted, mostly for sanity-checking
* (expect non-zero on first run, zero on subsequent runs).
*
* Phase 2 will surface this behind an admin UI button.
*/
export async function POST() {
try {
await requirePlatformRole();
} catch {
return NextResponse.json({ error: "Forbidden" }, { status: 403 });
}
try {
const tenants = await listTenants();
const result = await backfillTenantBillingLifecycle(
tenants.map((t) => ({
name: t.metadata.name,
// Tenants without the org label exist as a pre-Slice-3
// artifact; we still record them but with 'unknown' as the
// org id, which surfaces them in admin reports for manual
// labelling. Per-org billing computation skips rows with
// org id = 'unknown'.
zitadelOrgId:
t.metadata.labels?.["pieced.ch/zitadel-org-id"] ?? "unknown",
createdAt: t.metadata.creationTimestamp
? new Date(t.metadata.creationTimestamp)
: new Date(),
packages: t.spec.packages ?? [],
suspendedAt: t.status?.suspendedAt
? new Date(t.status.suspendedAt)
: null,
}))
);
return NextResponse.json({
message: "Backfill complete.",
tenantsExamined: tenants.length,
...result,
});
} catch (e: any) {
console.error("Backfill failed:", e);
return NextResponse.json(
{ error: safeError(e, "Backfill failed") },
{ status: 500 }
);
}
}

View File

@@ -4,6 +4,9 @@ import {
getTenantRequestById,
updateTenantRequestStatus,
clearEncryptedSecrets,
recordTenantCreated,
recordSkillEvents,
recordSuspensionEvent,
} from "@/lib/db";
import { createTenant, patchTenantSpec, setTenantAnnotation } from "@/lib/k8s";
import { sendApprovalEmail, sendResumeApprovalEmail } from "@/lib/email";
@@ -85,6 +88,23 @@ export async function POST(
}
try {
await patchTenantSpec(tenantRequest.tenantName, { suspend: false });
// Billing — Phase 1: record the resume so monthly proration
// counts the suspended segment correctly. Best-effort; if
// logging fails, the approval still succeeds.
try {
await recordSuspensionEvent(
tenantRequest.tenantName,
tenantRequest.zitadelOrgId,
"resumed"
);
} catch (e) {
console.error(
"billing: failed to record resumed suspension event:",
e
);
}
// Clear the annotation that pauses the operator's 60-day TTL.
// Best-effort — annotation cleanup is also done by the operator
// when it sees suspend=false on the next reconcile (it clears
@@ -199,6 +219,35 @@ export async function POST(
}
);
// Billing — Phase 1: record the tenant's creation and initial
// package state. Anchored at "now" rather than the CR's
// creationTimestamp because we don't get the timestamp back from
// createTenant — the few-millisecond skew vs the CR's actual
// creationTimestamp is irrelevant for monthly billing.
//
// Best-effort: tracking failures must never block provisioning.
// The backfill helper can repair any gaps later if needed.
const billingAnchor = new Date();
try {
await recordTenantCreated(
tenantName,
tenantRequest.zitadelOrgId,
billingAnchor
);
await recordSkillEvents(
tenantName,
tenantRequest.zitadelOrgId,
packages,
[],
billingAnchor
);
} catch (e) {
console.error(
"billing: failed to record tenant creation / initial skill events:",
e
);
}
// Step 5: Update request status — clear admin notes on re-approval
const updated = await updateTenantRequestStatus(id, "provisioning", {
adminNotes: isReApproval ? null : adminNotes,

View File

@@ -4,6 +4,7 @@ import { getTenant, deleteTenant } from "@/lib/k8s";
import {
markTenantRequestDeletedByTenantName,
removeAllAssignmentsForTenant,
recordTenantDeleted,
} from "@/lib/db";
import { safeError } from "@/lib/errors";
@@ -49,6 +50,15 @@ export async function POST(
console.error("Failed to clean up tenant assignments:", e)
);
// Billing — Phase 1: stamp deletion timestamp on the lifecycle
// row so the final invoice covering the deletion month can
// prorate correctly. Idempotent at the DB layer; a missing
// lifecycle row (e.g. pre-Phase-1 tenants that haven't been
// backfilled yet) makes this a no-op.
await recordTenantDeleted(name).catch((e) =>
console.error("billing: failed to stamp tenant deletion:", e)
);
return NextResponse.json({
message: "Tenant deletion initiated. The operator will clean up all resources.",
});

View File

@@ -3,6 +3,7 @@ import { getSessionUser, canMutate } from "@/lib/session";
import { canUserSeeTenant } from "@/lib/visibility";
import { getTenant, patchTenantSpec } from "@/lib/k8s";
import { getPackageDef } from "@/lib/packages";
import { recordSkillEvents } from "@/lib/db";
import { safeError } from "@/lib/errors";
const ALLOWED_WORKSPACE_FILES = ["SOUL.md", "AGENTS.md", "TOOLS.md"];
@@ -187,6 +188,50 @@ export async function PATCH(
}
const updated = await patchTenantSpec(name, specPatch);
// Billing — Phase 1: if packages changed, record enable/disable
// events. The diff is computed against the patched CR (the
// returned state) rather than `existing` so the events match
// what K8s actually committed. Best-effort: a logging failure
// never poisons the PATCH response — drift would be reconciled
// on the next backfill or by the next normal toggle.
//
// Note on races: two concurrent PATCHes could each see the
// same `existing` and both succeed at the K8s layer (last write
// wins for spec.packages, which is replaced wholesale). The
// events from the losing PATCH would then describe a transition
// that no longer reflects reality. Acceptable trade-off for v1
// — the toggle UI sends one request at a time and races would
// only matter for adjacent same-day toggles, which the billing
// computation collapses to a single billable day anyway.
if (specPatch.packages !== undefined) {
try {
const orgId =
existing.metadata.labels?.["pieced.ch/zitadel-org-id"] ?? null;
if (orgId) {
const oldSet = new Set<string>(existing.spec.packages ?? []);
const newSet = new Set<string>(updated.spec.packages ?? []);
const added = [...newSet].filter((x) => !oldSet.has(x));
const removed = [...oldSet].filter((x) => !newSet.has(x));
if (added.length > 0 || removed.length > 0) {
await recordSkillEvents(name, orgId, added, removed);
}
} else {
// A tenant without the org label is a pre-Slice-3 artifact
// — we can't attribute its skill events to any org. Log
// and skip rather than guess.
console.warn(
`billing: tenant ${name} has no zitadel-org-id label; skill events not recorded`
);
}
} catch (e) {
console.error(
`billing: failed to record skill events for ${name}:`,
e
);
}
}
return NextResponse.json(updated);
} catch (e: any) {
return NextResponse.json(

View File

@@ -3,6 +3,7 @@ import { z } from "zod";
import { getSessionUser, canMutate } from "@/lib/session";
import { getTenant, patchTenantSpec, setTenantAnnotation } from "@/lib/k8s";
import { canUserSeeTenant } from "@/lib/visibility";
import { recordSuspensionEvent } from "@/lib/db";
import { safeError } from "@/lib/errors";
const patchSchema = z.object({
@@ -101,6 +102,33 @@ export async function PATCH(
try {
await patchTenantSpec(name, { suspend });
// Billing — Phase 1: record the transition so monthly proration
// can exclude suspended days from the fixed fee. The portal
// commands this transition; the operator's status.suspendedAt
// lags by a reconcile cycle (seconds), which is irrelevant for
// monthly billing. Best-effort: a logging failure never blocks
// the suspend/resume itself.
try {
const orgId =
tenant.metadata.labels?.["pieced.ch/zitadel-org-id"] ?? null;
if (orgId) {
await recordSuspensionEvent(
name,
orgId,
suspend ? "suspended" : "resumed"
);
} else {
console.warn(
`billing: tenant ${name} has no zitadel-org-id label; suspension event not recorded`
);
}
} catch (e) {
console.error(
`billing: failed to record suspension event for ${name}:`,
e
);
}
// On admin-side resume, also clear the pending-resume-request
// annotation if it exists. Belt-and-suspenders: the admin-approve
// endpoint already clears it on its happy path, but a platform