Compare commits
4 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 342f5728f4 | |||
| 834bed88e0 | |||
| 819e90c16c | |||
| b5abc5958f |
@@ -2,5 +2,5 @@ apiVersion: v2
|
||||
name: pieced-threema-gateway
|
||||
description: PieCed IT central Threema Gateway relay
|
||||
type: application
|
||||
version: 0.1.4
|
||||
appVersion: "0.1.4"
|
||||
version: 0.1.8
|
||||
appVersion: "0.1.8"
|
||||
|
||||
@@ -0,0 +1,139 @@
|
||||
{{- if and .Values.postgres.enabled .Values.postgres.backup.enabled .Values.postgres.backup.cleanup.enabled }}
|
||||
# =============================================================================
|
||||
# Backup CR cleanup CronJob.
|
||||
#
|
||||
# The Cluster has barmanObjectStore.retentionPolicy set, but that only
|
||||
# prunes the actual backup data (base + WAL) in the MinIO bucket. CNPG
|
||||
# does NOT delete the Kubernetes `Backup` CRs that ScheduledBackup keeps
|
||||
# creating, so without this job they accumulate one per day forever and
|
||||
# bloat the ArgoCD resource tree under the ScheduledBackup parent.
|
||||
#
|
||||
# Strategy:
|
||||
# List all Backup CRs for cluster=pieced-threema-gateway-db, sort by
|
||||
# creationTimestamp ascending, drop the last N entries (newest),
|
||||
# delete the rest. Keep ~2x the S3 retention so we never delete a CR
|
||||
# whose data is still on disk.
|
||||
#
|
||||
# Same shape as apps/litellm-pg-backup-cleanup.yaml in pieced-gitops.
|
||||
# =============================================================================
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: pieced-threema-gateway-db-backup-cleanup
|
||||
namespace: {{ .Values.namespace }}
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: Role
|
||||
metadata:
|
||||
name: pieced-threema-gateway-db-backup-cleanup
|
||||
namespace: {{ .Values.namespace }}
|
||||
rules:
|
||||
- apiGroups: ["postgresql.cnpg.io"]
|
||||
resources: ["backups"]
|
||||
verbs: ["get", "list", "delete"]
|
||||
---
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: RoleBinding
|
||||
metadata:
|
||||
name: pieced-threema-gateway-db-backup-cleanup
|
||||
namespace: {{ .Values.namespace }}
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: Role
|
||||
name: pieced-threema-gateway-db-backup-cleanup
|
||||
subjects:
|
||||
- kind: ServiceAccount
|
||||
name: pieced-threema-gateway-db-backup-cleanup
|
||||
namespace: {{ .Values.namespace }}
|
||||
---
|
||||
apiVersion: batch/v1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: pieced-threema-gateway-db-backup-cleanup
|
||||
namespace: {{ .Values.namespace }}
|
||||
labels:
|
||||
app.kubernetes.io/name: pieced-threema-gateway-db-backup-cleanup
|
||||
app.kubernetes.io/part-of: pieced-platform
|
||||
spec:
|
||||
schedule: {{ .Values.postgres.backup.cleanup.schedule | quote }}
|
||||
concurrencyPolicy: Forbid
|
||||
successfulJobsHistoryLimit: 1
|
||||
failedJobsHistoryLimit: 1
|
||||
startingDeadlineSeconds: 600
|
||||
jobTemplate:
|
||||
spec:
|
||||
# Auto-clean the Job object 1h after completion so it doesn't
|
||||
# also pile up in ArgoCD's tree.
|
||||
ttlSecondsAfterFinished: 3600
|
||||
backoffLimit: 1
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app.kubernetes.io/name: pieced-threema-gateway-db-backup-cleanup
|
||||
spec:
|
||||
serviceAccountName: pieced-threema-gateway-db-backup-cleanup
|
||||
restartPolicy: OnFailure
|
||||
securityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1001
|
||||
runAsGroup: 1001
|
||||
seccompProfile:
|
||||
type: RuntimeDefault
|
||||
containers:
|
||||
- name: cleanup
|
||||
image: {{ .Values.postgres.backup.cleanup.image | quote }}
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
readOnlyRootFilesystem: true
|
||||
capabilities:
|
||||
drop: ["ALL"]
|
||||
resources:
|
||||
requests:
|
||||
cpu: 10m
|
||||
memory: 32Mi
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
env:
|
||||
- name: NAMESPACE
|
||||
value: {{ .Values.namespace | quote }}
|
||||
- name: CLUSTER
|
||||
value: pieced-threema-gateway-db
|
||||
- name: KEEP
|
||||
value: {{ .Values.postgres.backup.cleanup.keep | quote }}
|
||||
command:
|
||||
- /bin/bash
|
||||
- -c
|
||||
- |
|
||||
set -euo pipefail
|
||||
|
||||
echo "Listing Backup CRs for cluster=${CLUSTER} in ns=${NAMESPACE}"
|
||||
mapfile -t all < <(
|
||||
kubectl -n "${NAMESPACE}" get backups.postgresql.cnpg.io \
|
||||
-l "cnpg.io/cluster=${CLUSTER}" \
|
||||
--sort-by=.metadata.creationTimestamp \
|
||||
-o name
|
||||
)
|
||||
|
||||
total=${#all[@]}
|
||||
echo "Found ${total} backup CR(s); keeping newest ${KEEP}"
|
||||
|
||||
if (( total <= KEEP )); then
|
||||
echo "Nothing to prune."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
prune_count=$(( total - KEEP ))
|
||||
to_delete=("${all[@]:0:${prune_count}}")
|
||||
|
||||
echo "Deleting ${prune_count} old backup CR(s):"
|
||||
printf ' %s\n' "${to_delete[@]}"
|
||||
|
||||
# Delete in chunks to keep the kubectl command line sane
|
||||
# even if the historical backlog is in the hundreds.
|
||||
printf '%s\n' "${to_delete[@]}" \
|
||||
| xargs -r -n 50 kubectl -n "${NAMESPACE}" delete --ignore-not-found
|
||||
|
||||
echo "Done."
|
||||
{{- end }}
|
||||
@@ -0,0 +1,59 @@
|
||||
{{- if and .Values.postgres.enabled .Values.postgres.backup.enabled }}
|
||||
# =============================================================================
|
||||
# S3 credentials for the CNPG Cluster's barmanObjectStore.
|
||||
#
|
||||
# Projects the in-cluster MinIO root credentials out of OpenBao
|
||||
# (.Values.postgres.backup.s3.credentialsPath) into a Secret in this
|
||||
# namespace. Referenced by spec.backup.barmanObjectStore.s3Credentials
|
||||
# on the Cluster CR (see templates/database.yaml).
|
||||
#
|
||||
# Same shape and convention as the chart's other ExternalSecrets
|
||||
# (templates/externalsecret.yaml) — KV v2 path without /data/ segment.
|
||||
# =============================================================================
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: cnpg-s3-credentials
|
||||
namespace: {{ .Values.namespace }}
|
||||
spec:
|
||||
refreshInterval: 1h
|
||||
secretStoreRef:
|
||||
name: openbao-backend
|
||||
kind: ClusterSecretStore
|
||||
target:
|
||||
name: cnpg-s3-credentials
|
||||
creationPolicy: Owner
|
||||
data:
|
||||
- secretKey: ACCESS_KEY_ID
|
||||
remoteRef:
|
||||
key: {{ .Values.postgres.backup.s3.credentialsPath }}
|
||||
property: {{ .Values.postgres.backup.s3.accessKeyProperty }}
|
||||
- secretKey: ACCESS_SECRET_KEY
|
||||
remoteRef:
|
||||
key: {{ .Values.postgres.backup.s3.credentialsPath }}
|
||||
property: {{ .Values.postgres.backup.s3.secretKeyProperty }}
|
||||
---
|
||||
# =============================================================================
|
||||
# Daily backup of the pieced-threema-gateway-db CNPG cluster.
|
||||
#
|
||||
# IMPORTANT — cron format:
|
||||
# CNPG ScheduledBackup uses a SIX-field Go-style cron expression
|
||||
# (sec min hour dom mon dow), NOT the 5-field Unix crontab format. The
|
||||
# CNPG controller silently accepts 5-field expressions but reinterprets
|
||||
# them — see https://github.com/cloudnative-pg/cloudnative-pg/issues/5380
|
||||
# Default schedule (.Values.postgres.backup.schedule.cron) is set
|
||||
# accordingly.
|
||||
# =============================================================================
|
||||
apiVersion: postgresql.cnpg.io/v1
|
||||
kind: ScheduledBackup
|
||||
metadata:
|
||||
name: pieced-threema-gateway-db-daily
|
||||
namespace: {{ .Values.namespace }}
|
||||
spec:
|
||||
schedule: {{ .Values.postgres.backup.schedule.cron | quote }}
|
||||
backupOwnerReference: self
|
||||
cluster:
|
||||
name: pieced-threema-gateway-db
|
||||
method: barmanObjectStore
|
||||
immediate: {{ .Values.postgres.backup.schedule.immediate }}
|
||||
{{- end }}
|
||||
@@ -17,4 +17,23 @@ spec:
|
||||
{{- toYaml .Values.postgres.resources | nindent 4 }}
|
||||
monitoring:
|
||||
enablePodMonitor: true
|
||||
{{- if .Values.postgres.backup.enabled }}
|
||||
backup:
|
||||
barmanObjectStore:
|
||||
destinationPath: s3://{{ .Values.postgres.backup.s3.bucket }}/pieced-threema-gateway-db/
|
||||
endpointURL: {{ .Values.postgres.backup.s3.endpointURL | quote }}
|
||||
s3Credentials:
|
||||
accessKeyId:
|
||||
name: cnpg-s3-credentials
|
||||
key: ACCESS_KEY_ID
|
||||
secretAccessKey:
|
||||
name: cnpg-s3-credentials
|
||||
key: ACCESS_SECRET_KEY
|
||||
wal:
|
||||
compression: {{ .Values.postgres.backup.wal.compression }}
|
||||
maxParallel: {{ .Values.postgres.backup.wal.maxParallel }}
|
||||
data:
|
||||
compression: {{ .Values.postgres.backup.data.compression }}
|
||||
retentionPolicy: {{ .Values.postgres.backup.retentionPolicy | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -51,7 +51,17 @@ spec:
|
||||
- port: "8080"
|
||||
protocol: TCP
|
||||
egress:
|
||||
# DNS
|
||||
# DNS — with the proxy interceptor on so toFQDNs rules below
|
||||
# actually work.
|
||||
#
|
||||
# Cilium `toFQDNs` matches against a per-pod identity that is
|
||||
# populated only when the Cilium DNS proxy observes a resolution
|
||||
# for that name. The proxy is enabled per-policy by a `rules.dns`
|
||||
# clause on the DNS egress: without it, DNS resolution still
|
||||
# succeeds (we allow port 53 to kube-system) but Cilium never
|
||||
# learns the resolved IP, so the subsequent TCP connect to
|
||||
# msgapi.threema.ch is denied at egress and the relay logs
|
||||
# "fetch failed" with no further detail.
|
||||
- toEndpoints:
|
||||
- matchLabels:
|
||||
"k8s:io.cilium.k8s.namespace.labels.kubernetes.io/metadata.name": "kube-system"
|
||||
@@ -61,6 +71,9 @@ spec:
|
||||
protocol: UDP
|
||||
- port: "53"
|
||||
protocol: TCP
|
||||
rules:
|
||||
dns:
|
||||
- matchPattern: "*"
|
||||
# Threema Gateway public API
|
||||
- toFQDNs:
|
||||
- matchName: "msgapi.threema.ch"
|
||||
@@ -83,12 +96,20 @@ spec:
|
||||
- ports:
|
||||
- port: "5432"
|
||||
protocol: TCP
|
||||
# Tenant OpenClaw services — port 18789, any tenant namespace
|
||||
# Tenant OpenClaw services — port 18790 (Service targetPort).
|
||||
#
|
||||
# Why 18790, not 18789:
|
||||
# OpenClaw's per-tenant Service exposes the gateway as
|
||||
# `port: 18789, targetPort: 18790`. Cilium's socket-LB rewrites
|
||||
# `connect(svc-IP:18789)` to `pod-IP:18790` before the egress policy
|
||||
# hook fires, so the rule must allow the targetPort (18790), not
|
||||
# the Service port. The application's OPENCLAW_URL_TEMPLATE still
|
||||
# uses :18789 (correct — application connects to the Service port).
|
||||
- toEndpoints:
|
||||
- matchLabels:
|
||||
{{ .Values.networkPolicy.tenantNamespaceLabel | quote }}: {{ .Values.networkPolicy.tenantNamespaceLabelValue | quote }}
|
||||
toPorts:
|
||||
- ports:
|
||||
- port: "18789"
|
||||
- port: "18790"
|
||||
protocol: TCP
|
||||
{{- end }}
|
||||
|
||||
@@ -6,7 +6,7 @@ namespace: threema-gateway
|
||||
|
||||
image:
|
||||
repository: registry.c5ai.ch/pieced/pieced-threema-gateway
|
||||
tag: "0.1.4"
|
||||
tag: "0.1.8"
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
# Pull from registry.c5ai.ch — matches operator + portal pattern.
|
||||
@@ -39,12 +39,88 @@ postgres:
|
||||
instances: 1
|
||||
storage:
|
||||
size: 5Gi
|
||||
storageClass: longhorn-luks2
|
||||
# Matches portal-db, litellm-pg, zitadel-pg, twenty-pg in pieced-gitops.
|
||||
# The relay's `messages` log row payload is small (no message bodies,
|
||||
# ~80 B per row), so 5Gi covers ~50 M messages — far beyond what a
|
||||
# single tenant's billing window will need.
|
||||
storageClass: longhorn
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
memory: 256Mi
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Daily backup to in-cluster MinIO via barmanObjectStore.
|
||||
#
|
||||
# When enabled, the chart renders:
|
||||
# - spec.backup on the CNPG Cluster (templates/database.yaml)
|
||||
# - ExternalSecret "cnpg-s3-credentials" pulling MinIO root creds
|
||||
# from OpenBao (templates/database-backup.yaml)
|
||||
# - ScheduledBackup "pieced-threema-gateway-db-daily" (templates/database-backup.yaml)
|
||||
# - CronJob "pieced-threema-gateway-db-backup-cleanup" that prunes
|
||||
# old Backup CRs so the ArgoCD resource tree stays tidy
|
||||
# (templates/database-backup-cleanup.yaml)
|
||||
#
|
||||
# Note on Cilium: this chart's CiliumNetworkPolicy only restricts the
|
||||
# relay pod (endpointSelector matches app.kubernetes.io/name=
|
||||
# pieced-threema-gateway). The CNPG postgres pod is NOT covered by
|
||||
# that policy, so its egress to MinIO works freely as long as there
|
||||
# is no namespace-level default-deny CNP in threema-gateway. If you
|
||||
# later add one, you'll also need to allow egress to
|
||||
# minio.minio-pieced.svc:80 from pods labelled cnpg.io/cluster=
|
||||
# pieced-threema-gateway-db.
|
||||
backup:
|
||||
enabled: true
|
||||
|
||||
# Where backups land. The destinationPath is hard-coded to use the
|
||||
# cluster name so per-cluster paths don't collide in the shared
|
||||
# cnpg-backups bucket (matches portal-db, litellm-pg, etc.).
|
||||
s3:
|
||||
bucket: cnpg-backups
|
||||
endpointURL: http://minio.minio-pieced.svc:80
|
||||
# OpenBao path containing MinIO root_user / root_password.
|
||||
# ESO's openbao-backend ClusterSecretStore rewrites KV v2 paths
|
||||
# automatically, so no `/data/` segment is needed (matches the
|
||||
# convention used by the chart's other ExternalSecrets above).
|
||||
credentialsPath: secret/platform/minio-pieced
|
||||
accessKeyProperty: root_user
|
||||
secretKeyProperty: root_password
|
||||
|
||||
wal:
|
||||
compression: gzip
|
||||
maxParallel: 2
|
||||
data:
|
||||
compression: gzip
|
||||
|
||||
# Barman retains backup *data* in S3 for this many days. The Backup
|
||||
# CR cleanup CronJob below independently keeps roughly twice this
|
||||
# many Backup CRs in Kubernetes so we never delete a CR whose data
|
||||
# is still on disk.
|
||||
retentionPolicy: "7d"
|
||||
|
||||
schedule:
|
||||
# CNPG ScheduledBackup uses a SIX-field Go-style cron expression
|
||||
# (sec min hour dom mon dow), NOT the 5-field Unix crontab format.
|
||||
# See https://github.com/cloudnative-pg/cloudnative-pg/issues/5380
|
||||
# for the silent-misinterpretation footgun.
|
||||
# Slot: 02:45:00 daily — between litellm-pg (02:30) and portal-db
|
||||
# (03:00) so the daily snapshot wave is staggered.
|
||||
cron: "0 45 2 * * *"
|
||||
# Trigger an immediate backup when the ScheduledBackup is created.
|
||||
# Useful on first deploy so the `cnpg_collector_last_available_backup_timestamp`
|
||||
# metric ticks immediately and PieCedCNPGBackupFailed clears.
|
||||
immediate: true
|
||||
|
||||
# Backup CR cleanup CronJob. Same shape as
|
||||
# apps/litellm-pg-backup-cleanup.yaml in pieced-gitops.
|
||||
cleanup:
|
||||
enabled: true
|
||||
# Daily at 04:45 — runs ~2h after the ScheduledBackup so the day's
|
||||
# new CR exists and is preserved in the "newest N" window.
|
||||
schedule: "45 4 * * *"
|
||||
keep: 14
|
||||
image: bitnami/kubectl:1.31.6
|
||||
|
||||
# Secrets sourced from OpenBao via External Secrets Operator.
|
||||
# Paths use the same convention as apps/portal/external-secrets.yaml:
|
||||
# full key path starting with the KV v2 mount name (`secret/`), no
|
||||
|
||||
Reference in New Issue
Block a user