Files
pieced-threema-gateway-public/deploy/helm/pieced-threema-gateway/templates/database-backup-cleanup.yaml

140 lines
4.9 KiB
YAML

{{- if and .Values.postgres.enabled .Values.postgres.backup.enabled .Values.postgres.backup.cleanup.enabled }}
# =============================================================================
# Backup CR cleanup CronJob.
#
# The Cluster has barmanObjectStore.retentionPolicy set, but that only
# prunes the actual backup data (base + WAL) in the MinIO bucket. CNPG
# does NOT delete the Kubernetes `Backup` CRs that ScheduledBackup keeps
# creating, so without this job they accumulate one per day forever and
# bloat the ArgoCD resource tree under the ScheduledBackup parent.
#
# Strategy:
# List all Backup CRs for cluster=pieced-threema-gateway-db, sort by
# creationTimestamp ascending, drop the last N entries (newest),
# delete the rest. Keep ~2x the S3 retention so we never delete a CR
# whose data is still on disk.
#
# Same shape as apps/litellm-pg-backup-cleanup.yaml in pieced-gitops.
# =============================================================================
apiVersion: v1
kind: ServiceAccount
metadata:
name: pieced-threema-gateway-db-backup-cleanup
namespace: {{ .Values.namespace }}
---
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: pieced-threema-gateway-db-backup-cleanup
namespace: {{ .Values.namespace }}
rules:
- apiGroups: ["postgresql.cnpg.io"]
resources: ["backups"]
verbs: ["get", "list", "delete"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: pieced-threema-gateway-db-backup-cleanup
namespace: {{ .Values.namespace }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: Role
name: pieced-threema-gateway-db-backup-cleanup
subjects:
- kind: ServiceAccount
name: pieced-threema-gateway-db-backup-cleanup
namespace: {{ .Values.namespace }}
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: pieced-threema-gateway-db-backup-cleanup
namespace: {{ .Values.namespace }}
labels:
app.kubernetes.io/name: pieced-threema-gateway-db-backup-cleanup
app.kubernetes.io/part-of: pieced-platform
spec:
schedule: {{ .Values.postgres.backup.cleanup.schedule | quote }}
concurrencyPolicy: Forbid
successfulJobsHistoryLimit: 1
failedJobsHistoryLimit: 1
startingDeadlineSeconds: 600
jobTemplate:
spec:
# Auto-clean the Job object 1h after completion so it doesn't
# also pile up in ArgoCD's tree.
ttlSecondsAfterFinished: 3600
backoffLimit: 1
template:
metadata:
labels:
app.kubernetes.io/name: pieced-threema-gateway-db-backup-cleanup
spec:
serviceAccountName: pieced-threema-gateway-db-backup-cleanup
restartPolicy: OnFailure
securityContext:
runAsNonRoot: true
runAsUser: 1001
runAsGroup: 1001
seccompProfile:
type: RuntimeDefault
containers:
- name: cleanup
image: {{ .Values.postgres.backup.cleanup.image | quote }}
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
readOnlyRootFilesystem: true
capabilities:
drop: ["ALL"]
resources:
requests:
cpu: 10m
memory: 32Mi
limits:
cpu: 100m
memory: 128Mi
env:
- name: NAMESPACE
value: {{ .Values.namespace | quote }}
- name: CLUSTER
value: pieced-threema-gateway-db
- name: KEEP
value: {{ .Values.postgres.backup.cleanup.keep | quote }}
command:
- /bin/bash
- -c
- |
set -euo pipefail
echo "Listing Backup CRs for cluster=${CLUSTER} in ns=${NAMESPACE}"
mapfile -t all < <(
kubectl -n "${NAMESPACE}" get backups.postgresql.cnpg.io \
-l "cnpg.io/cluster=${CLUSTER}" \
--sort-by=.metadata.creationTimestamp \
-o name
)
total=${#all[@]}
echo "Found ${total} backup CR(s); keeping newest ${KEEP}"
if (( total <= KEEP )); then
echo "Nothing to prune."
exit 0
fi
prune_count=$(( total - KEEP ))
to_delete=("${all[@]:0:${prune_count}}")
echo "Deleting ${prune_count} old backup CR(s):"
printf ' %s\n' "${to_delete[@]}"
# Delete in chunks to keep the kubectl command line sane
# even if the historical backlog is in the hundreds.
printf '%s\n' "${to_delete[@]}" \
| xargs -r -n 50 kubectl -n "${NAMESPACE}" delete --ignore-not-found
echo "Done."
{{- end }}