{{- if and .Values.postgres.enabled .Values.postgres.backup.enabled .Values.postgres.backup.cleanup.enabled }} # ============================================================================= # Backup CR cleanup CronJob. # # The Cluster has barmanObjectStore.retentionPolicy set, but that only # prunes the actual backup data (base + WAL) in the MinIO bucket. CNPG # does NOT delete the Kubernetes `Backup` CRs that ScheduledBackup keeps # creating, so without this job they accumulate one per day forever and # bloat the ArgoCD resource tree under the ScheduledBackup parent. # # Strategy: # List all Backup CRs for cluster=pieced-threema-gateway-db, sort by # creationTimestamp ascending, drop the last N entries (newest), # delete the rest. Keep ~2x the S3 retention so we never delete a CR # whose data is still on disk. # # Same shape as apps/litellm-pg-backup-cleanup.yaml in pieced-gitops. # ============================================================================= apiVersion: v1 kind: ServiceAccount metadata: name: pieced-threema-gateway-db-backup-cleanup namespace: {{ .Values.namespace }} --- apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: name: pieced-threema-gateway-db-backup-cleanup namespace: {{ .Values.namespace }} rules: - apiGroups: ["postgresql.cnpg.io"] resources: ["backups"] verbs: ["get", "list", "delete"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: name: pieced-threema-gateway-db-backup-cleanup namespace: {{ .Values.namespace }} roleRef: apiGroup: rbac.authorization.k8s.io kind: Role name: pieced-threema-gateway-db-backup-cleanup subjects: - kind: ServiceAccount name: pieced-threema-gateway-db-backup-cleanup namespace: {{ .Values.namespace }} --- apiVersion: batch/v1 kind: CronJob metadata: name: pieced-threema-gateway-db-backup-cleanup namespace: {{ .Values.namespace }} labels: app.kubernetes.io/name: pieced-threema-gateway-db-backup-cleanup app.kubernetes.io/part-of: pieced-platform spec: schedule: {{ .Values.postgres.backup.cleanup.schedule | quote }} concurrencyPolicy: Forbid successfulJobsHistoryLimit: 1 failedJobsHistoryLimit: 1 startingDeadlineSeconds: 600 jobTemplate: spec: # Auto-clean the Job object 1h after completion so it doesn't # also pile up in ArgoCD's tree. ttlSecondsAfterFinished: 3600 backoffLimit: 1 template: metadata: labels: app.kubernetes.io/name: pieced-threema-gateway-db-backup-cleanup spec: serviceAccountName: pieced-threema-gateway-db-backup-cleanup restartPolicy: OnFailure securityContext: runAsNonRoot: true runAsUser: 1001 runAsGroup: 1001 seccompProfile: type: RuntimeDefault containers: - name: cleanup image: {{ .Values.postgres.backup.cleanup.image | quote }} imagePullPolicy: IfNotPresent securityContext: allowPrivilegeEscalation: false readOnlyRootFilesystem: true capabilities: drop: ["ALL"] resources: requests: cpu: 10m memory: 32Mi limits: cpu: 100m memory: 128Mi env: - name: NAMESPACE value: {{ .Values.namespace | quote }} - name: CLUSTER value: pieced-threema-gateway-db - name: KEEP value: {{ .Values.postgres.backup.cleanup.keep | quote }} command: - /bin/bash - -c - | set -euo pipefail echo "Listing Backup CRs for cluster=${CLUSTER} in ns=${NAMESPACE}" mapfile -t all < <( kubectl -n "${NAMESPACE}" get backups.postgresql.cnpg.io \ -l "cnpg.io/cluster=${CLUSTER}" \ --sort-by=.metadata.creationTimestamp \ -o name ) total=${#all[@]} echo "Found ${total} backup CR(s); keeping newest ${KEEP}" if (( total <= KEEP )); then echo "Nothing to prune." exit 0 fi prune_count=$(( total - KEEP )) to_delete=("${all[@]:0:${prune_count}}") echo "Deleting ${prune_count} old backup CR(s):" printf ' %s\n' "${to_delete[@]}" # Delete in chunks to keep the kubectl command line sane # even if the historical backlog is in the hundreds. printf '%s\n' "${to_delete[@]}" \ | xargs -r -n 50 kubectl -n "${NAMESPACE}" delete --ignore-not-found echo "Done." {{- end }}