284 lines
12 KiB
Bash
284 lines
12 KiB
Bash
#!/usr/bin/env bash
|
|
# ============================================================================
|
|
# PieCed IT — Session 7.1: Cilium Network Policy Audit
|
|
# ============================================================================
|
|
#
|
|
# Prerequisites:
|
|
# - kubectl configured for the cluster
|
|
# - Existing pods:
|
|
# tenant-alpha/openclaw-0 (3 containers)
|
|
# tenant-testfirma/openclaw-0 (3 containers)
|
|
# pieced-system/pieced-portal-* (1 container)
|
|
#
|
|
# This script deploys temporary netshoot pods (they have curl, nslookup, etc.)
|
|
# into each namespace, runs the tests, then cleans up.
|
|
#
|
|
# Usage:
|
|
# chmod +x cilium-audit.sh
|
|
# ./cilium-audit.sh
|
|
# ============================================================================
|
|
|
|
set -euo pipefail
|
|
|
|
RED='\033[0;31m'
|
|
GRN='\033[0;32m'
|
|
YLW='\033[1;33m'
|
|
RST='\033[0m'
|
|
|
|
PASS=0
|
|
FAIL=0
|
|
WARN=0
|
|
|
|
# Results file
|
|
RESULTS_FILE="cilium-audit-results-$(date +%Y%m%d-%H%M%S).md"
|
|
|
|
log_header() {
|
|
echo ""
|
|
echo -e "${YLW}═══════════════════════════════════════════════════${RST}"
|
|
echo -e "${YLW} $1${RST}"
|
|
echo -e "${YLW}═══════════════════════════════════════════════════${RST}"
|
|
}
|
|
|
|
log_result() {
|
|
local test_name="$1"
|
|
local from_ns="$2"
|
|
local to_target="$3"
|
|
local expected="$4" # "block" or "allow"
|
|
local actual="$5" # exit code from curl/nslookup: 0=success, non-0=fail
|
|
|
|
if [[ "$expected" == "block" ]]; then
|
|
if [[ "$actual" -ne 0 ]]; then
|
|
echo -e " ${GRN}✓ PASS${RST} [$from_ns → $to_target] $test_name (blocked as expected)"
|
|
PASS=$((PASS + 1))
|
|
echo "| $test_name | $from_ns | $to_target | block | blocked | ✅ PASS |" >> "$RESULTS_FILE"
|
|
else
|
|
echo -e " ${RED}✗ FAIL${RST} [$from_ns → $to_target] $test_name (SHOULD BE BLOCKED but succeeded!)"
|
|
FAIL=$((FAIL + 1))
|
|
echo "| $test_name | $from_ns | $to_target | block | **ALLOWED** | ❌ FAIL |" >> "$RESULTS_FILE"
|
|
fi
|
|
else
|
|
if [[ "$actual" -eq 0 ]]; then
|
|
echo -e " ${GRN}✓ PASS${RST} [$from_ns → $to_target] $test_name (allowed as expected)"
|
|
PASS=$((PASS + 1))
|
|
echo "| $test_name | $from_ns | $to_target | allow | allowed | ✅ PASS |" >> "$RESULTS_FILE"
|
|
else
|
|
echo -e " ${RED}✗ FAIL${RST} [$from_ns → $to_target] $test_name (SHOULD BE ALLOWED but blocked!)"
|
|
FAIL=$((FAIL + 1))
|
|
echo "| $test_name | $from_ns | $to_target | allow | **BLOCKED** | ❌ FAIL |" >> "$RESULTS_FILE"
|
|
fi
|
|
fi
|
|
}
|
|
|
|
# ----------------------------------------------------------------------------
|
|
# Deploy netshoot pods
|
|
# ----------------------------------------------------------------------------
|
|
deploy_netshoot() {
|
|
local ns="$1"
|
|
local name="netshoot-audit"
|
|
echo " Deploying netshoot in $ns..."
|
|
kubectl run "$name" -n "$ns" \
|
|
--image=nicolaka/netshoot \
|
|
--restart=Never \
|
|
--labels="app=netshoot-audit" \
|
|
--command -- sleep 600 2>/dev/null || true
|
|
kubectl wait --for=condition=Ready pod/"$name" -n "$ns" --timeout=60s
|
|
}
|
|
|
|
cleanup_netshoot() {
|
|
echo ""
|
|
echo "Cleaning up netshoot pods..."
|
|
for ns in tenant-alpha tenant-testfirma pieced-system; do
|
|
kubectl delete pod netshoot-audit -n "$ns" --ignore-not-found --wait=false 2>/dev/null || true
|
|
done
|
|
echo "Done."
|
|
}
|
|
|
|
# Clean up on exit
|
|
trap cleanup_netshoot EXIT
|
|
|
|
# Run a command in netshoot pod, return exit code
|
|
# Uses --connect-timeout 5 for curl, timeout 5 for nslookup
|
|
run_in() {
|
|
local ns="$1"
|
|
shift
|
|
kubectl exec -n "$ns" netshoot-audit -- "$@" >/dev/null 2>&1
|
|
return $?
|
|
}
|
|
|
|
# ============================================================================
|
|
# Start
|
|
# ============================================================================
|
|
|
|
echo ""
|
|
echo "PieCed IT — Cilium Network Policy Audit"
|
|
echo "Date: $(date -u '+%Y-%m-%d %H:%M:%S UTC')"
|
|
echo ""
|
|
|
|
# Initialize results markdown
|
|
cat > "$RESULTS_FILE" <<'EOF'
|
|
# Cilium Network Policy Audit Results
|
|
|
|
| Test | From | To | Expected | Actual | Result |
|
|
|------|------|----|----------|--------|--------|
|
|
EOF
|
|
|
|
# Deploy netshoot pods
|
|
log_header "Deploying audit pods"
|
|
deploy_netshoot tenant-alpha
|
|
deploy_netshoot tenant-testfirma
|
|
deploy_netshoot pieced-system
|
|
|
|
# ============================================================================
|
|
# SECTION 1: Tenant-to-Tenant Isolation
|
|
# ============================================================================
|
|
log_header "1. Tenant-to-Tenant Isolation"
|
|
|
|
# tenant-alpha → tenant-testfirma OpenClaw (port 18789)
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://openclaw.tenant-testfirma.svc:18789 && rc=0 || rc=$?
|
|
log_result "Cross-tenant: alpha→testfirma:18789" "tenant-alpha" "openclaw.tenant-testfirma:18789" "block" "$rc"
|
|
|
|
# tenant-testfirma → tenant-alpha OpenClaw (port 18789)
|
|
run_in tenant-testfirma curl -s --connect-timeout 5 http://openclaw.tenant-alpha.svc:18789 && rc=0 || rc=$?
|
|
log_result "Cross-tenant: testfirma→alpha:18789" "tenant-testfirma" "openclaw.tenant-alpha:18789" "block" "$rc"
|
|
|
|
# Cross-tenant on other OpenClaw ports
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://openclaw.tenant-testfirma.svc:18793 && rc=0 || rc=$?
|
|
log_result "Cross-tenant: alpha→testfirma:18793" "tenant-alpha" "openclaw.tenant-testfirma:18793" "block" "$rc"
|
|
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://openclaw.tenant-testfirma.svc:9090 && rc=0 || rc=$?
|
|
log_result "Cross-tenant: alpha→testfirma:9090" "tenant-alpha" "openclaw.tenant-testfirma:9090" "block" "$rc"
|
|
|
|
# ============================================================================
|
|
# SECTION 2: Tenant → Platform Services (must be blocked except LiteLLM)
|
|
# ============================================================================
|
|
log_header "2. Tenant → Platform Services"
|
|
|
|
# OpenBao
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://openbao.openbao-system.svc:8200/v1/sys/health && rc=0 || rc=$?
|
|
log_result "Tenant→OpenBao" "tenant-alpha" "openbao:8200" "block" "$rc"
|
|
|
|
# ZITADEL (direct svc, not via ingress)
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://zitadel.zitadel.svc:8080/debug/healthz && rc=0 || rc=$?
|
|
log_result "Tenant→ZITADEL (svc)" "tenant-alpha" "zitadel:8080" "block" "$rc"
|
|
|
|
# Portal
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://pieced-portal.pieced-system.svc:3000 && rc=0 || rc=$?
|
|
log_result "Tenant→Portal" "tenant-alpha" "pieced-portal:3000" "block" "$rc"
|
|
|
|
# Portal DB
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://portal-db-rw.pieced-system.svc:5432 && rc=0 || rc=$?
|
|
log_result "Tenant→Portal DB" "tenant-alpha" "portal-db-rw:5432" "block" "$rc"
|
|
|
|
# ArgoCD
|
|
run_in tenant-alpha curl -sk --connect-timeout 5 https://argocd-server.argocd.svc:443 && rc=0 || rc=$?
|
|
log_result "Tenant→ArgoCD" "tenant-alpha" "argocd-server:443" "block" "$rc"
|
|
|
|
# ============================================================================
|
|
# SECTION 3: Tenant → K8s API Server (must be blocked)
|
|
# ============================================================================
|
|
log_header "3. Tenant → K8s API Server"
|
|
|
|
run_in tenant-alpha curl -sk --connect-timeout 5 https://kubernetes.default.svc:443/version && rc=0 || rc=$?
|
|
log_result "Tenant→K8s API" "tenant-alpha" "kubernetes.default:443" "block" "$rc"
|
|
|
|
# Also test from the other tenant
|
|
run_in tenant-testfirma curl -sk --connect-timeout 5 https://kubernetes.default.svc:443/version && rc=0 || rc=$?
|
|
log_result "Tenant→K8s API" "tenant-testfirma" "kubernetes.default:443" "block" "$rc"
|
|
|
|
# ============================================================================
|
|
# SECTION 4: Tenant → Allowed Paths (must succeed)
|
|
# ============================================================================
|
|
log_header "4. Tenant → Allowed Paths"
|
|
|
|
# DNS resolution
|
|
run_in tenant-alpha nslookup -timeout=5 google.com && rc=0 || rc=$?
|
|
log_result "Tenant→DNS" "tenant-alpha" "kube-dns" "allow" "$rc"
|
|
|
|
# LiteLLM (adjust namespace if different — check your actual LiteLLM svc namespace)
|
|
# Based on .env.example: LITELLM_INTERNAL_URL=http://litellm.inference.svc:4000
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://litellm.inference.svc:4000/health && rc=0 || rc=$?
|
|
log_result "Tenant→LiteLLM" "tenant-alpha" "litellm.inference:4000" "allow" "$rc"
|
|
|
|
# External HTTPS (world:443)
|
|
run_in tenant-alpha curl -s --connect-timeout 5 https://httpbin.org/status/200 && rc=0 || rc=$?
|
|
log_result "Tenant→world:443" "tenant-alpha" "httpbin.org:443" "allow" "$rc"
|
|
|
|
# ============================================================================
|
|
# SECTION 5: Platform Pod → Platform Services (must succeed)
|
|
# ============================================================================
|
|
log_header "5. Platform → Platform Services"
|
|
|
|
# Platform → OpenBao
|
|
run_in pieced-system curl -s --connect-timeout 5 http://openbao.openbao.svc:8200/v1/sys/health && rc=0 || rc=$?
|
|
log_result "Platform→OpenBao" "pieced-system" "openbao:8200" "allow" "$rc"
|
|
|
|
# Platform → ZITADEL
|
|
run_in pieced-system curl -s --connect-timeout 5 http://zitadel.zitadel.svc:8080/debug/healthz && rc=0 || rc=$?
|
|
log_result "Platform→ZITADEL" "pieced-system" "zitadel:8080" "allow" "$rc"
|
|
|
|
# Platform → K8s API
|
|
run_in pieced-system curl -sk --connect-timeout 5 https://kubernetes.default.svc:443/version && rc=0 || rc=$?
|
|
log_result "Platform→K8s API" "pieced-system" "kubernetes.default:443" "allow" "$rc"
|
|
|
|
# Platform → LiteLLM
|
|
run_in pieced-system curl -s --connect-timeout 5 http://litellm.inference.svc:4000/health && rc=0 || rc=$?
|
|
log_result "Platform→LiteLLM" "pieced-system" "litellm.inference:4000" "allow" "$rc"
|
|
|
|
# Platform → Portal DB (internal connectivity)
|
|
run_in pieced-system curl -s --connect-timeout 5 http://portal-db-rw.pieced-system.svc:5432 && rc=0 || rc=$?
|
|
log_result "Platform→Portal DB" "pieced-system" "portal-db-rw:5432" "allow" "$rc"
|
|
|
|
# ============================================================================
|
|
# SECTION 6: Reverse — Tenant → Platform Pod (must be blocked)
|
|
# ============================================================================
|
|
log_header "6. Tenant → Platform Pods (reverse check)"
|
|
|
|
# Tenant → operator
|
|
run_in tenant-alpha curl -s --connect-timeout 5 http://pieced-operator.pieced-system.svc:8080 && rc=0 || rc=$?
|
|
log_result "Tenant→Operator" "tenant-alpha" "pieced-operator:8080" "block" "$rc"
|
|
|
|
# ============================================================================
|
|
# SECTION 7: Metadata / Edge Cases
|
|
# ============================================================================
|
|
log_header "7. Edge Cases"
|
|
|
|
# Cloud metadata endpoint (should be unreachable on bare metal, but verify)
|
|
run_in tenant-alpha curl -s --connect-timeout 3 http://169.254.169.254/latest/meta-data/ && rc=0 || rc=$?
|
|
log_result "Tenant→metadata endpoint" "tenant-alpha" "169.254.169.254" "block" "$rc"
|
|
|
|
# ============================================================================
|
|
# Summary
|
|
# ============================================================================
|
|
echo ""
|
|
echo -e "${YLW}═══════════════════════════════════════════════════${RST}"
|
|
echo -e "${YLW} SUMMARY${RST}"
|
|
echo -e "${YLW}═══════════════════════════════════════════════════${RST}"
|
|
echo ""
|
|
echo -e " ${GRN}Passed: $PASS${RST}"
|
|
echo -e " ${RED}Failed: $FAIL${RST}"
|
|
echo ""
|
|
|
|
# Append summary to results file
|
|
cat >> "$RESULTS_FILE" <<EOF
|
|
|
|
## Summary
|
|
|
|
- **Passed**: $PASS
|
|
- **Failed**: $FAIL
|
|
- **Date**: $(date -u '+%Y-%m-%d %H:%M:%S UTC')
|
|
|
|
## Notes
|
|
|
|
- DNS exfiltration: DNS is allowed for tenants (required for egress). DNS tunneling is a theoretical risk — acceptable for pilot. Consider Cilium DNS-aware policies post-pilot.
|
|
- LiteLLM namespace: Tests assume \`litellm.inference.svc:4000\`. Adjust if your LiteLLM is in a different namespace.
|
|
- K8s API blocking: If this test fails, you need an explicit CiliumClusterwideNetworkPolicy denying egress to the API server CIDR from tenant namespaces. The API server is typically at the host IP or 10.96.0.1, not in a pod namespace, so namespace-based deny may not cover it.
|
|
EOF
|
|
|
|
echo "Full results written to: $RESULTS_FILE"
|
|
|
|
if [[ $FAIL -gt 0 ]]; then
|
|
echo ""
|
|
echo -e "${RED}⚠ $FAIL test(s) failed — review results and fix network policies.${RST}"
|
|
exit 1
|
|
fi
|