diff --git a/clusters/_template/bootstrap-kit/11-powerdns.yaml b/clusters/_template/bootstrap-kit/11-powerdns.yaml index bbfa365b..d4241f1d 100644 --- a/clusters/_template/bootstrap-kit/11-powerdns.yaml +++ b/clusters/_template/bootstrap-kit/11-powerdns.yaml @@ -110,7 +110,21 @@ spec: # Helm post-install hook failed, HR FAILED 4× → terminal. # New deadline (14m) sits below the HR install.timeout cap of # 15m so Flux's remediation can still reclaim a true failure. - version: 1.2.2 + # 1.2.3 (Fix #144-followup, prov #37+#38 recurrence 2026-05-12): + # bumping activeDeadlineSeconds alone was insufficient — the Job + # hit BackoffLimitExceeded (NOT DeadlineExceeded) at ~10min + # because each container invocation curl'd a Service with empty + # Ready endpoints (powerdns Pods Pending behind a worker-capacity + # wedge that kept bp-cnpg's pdns-pg-1-initdb itself Pending). + # Container restartPolicy=OnFailure + backoffLimit=6 killed the + # Job long before activeDeadlineSeconds had any effect. Fix moves + # the wait-for-API loop INSIDE the container (restartPolicy=Never, + # bounded by new apiReadyTimeoutSeconds=600s) so one Pod owns + # the full 14m budget. Trace: in chroot prov #38, HR status + # message read "Helm install failed for release powerdns/powerdns + # with chart bp-powerdns@1.2.2: failed post-install: 1 error + # occurred: * job powerdns-zone-bootstrap failed: BackoffLimitExceeded". + version: 1.2.3 sourceRef: kind: HelmRepository name: bp-powerdns diff --git a/platform/powerdns/chart/Chart.yaml b/platform/powerdns/chart/Chart.yaml index 02afc7b7..b6c4b643 100644 --- a/platform/powerdns/chart/Chart.yaml +++ b/platform/powerdns/chart/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: v2 name: bp-powerdns -version: 1.2.2 +version: 1.2.3 description: | Catalyst-curated Blueprint wrapper for PowerDNS Authoritative. Carries Catalyst-specific values.yaml + templates (CNPG cluster, dnsdist @@ -15,6 +15,18 @@ description: | `helm dependency build` resolves it; values.yaml carries both the catalystBlueprint metadata block and the upstream subchart values. + 1.2.3 — Fix #144 recurrence (prov #37+#38 InstallFailed + BackoffLimitExceeded, 2026-05-12). Bumping activeDeadlineSeconds alone + (Fix #144) was insufficient: the post-install hook Job's container + exited within backoffLimit=6 (~10min wall-time) because curl against + http://powerdns:8081 hit a Service with empty Ready endpoints (powerdns + Deployment Pods Pending behind a CNPG initdb that itself waited for + worker capacity). The 14m deadline never got a chance — the Job died + at BackoffLimit. Fix moves the wait-for-API loop INSIDE the container + (restartPolicy: Never) so one Pod owns the full activeDeadlineSeconds + budget. New value apiReadyTimeoutSeconds (default 600s) bounds the + inner poll; surfaced via values.yaml per INVIOLABLE-PRINCIPLES #4. + Bumped to 1.2.0 — multi-zone bootstrap (issue #827, parent epic #825). A franchised Sovereign now supports N parent zones, NOT one. New values key `zones: []` declares the parent domains the operator diff --git a/platform/powerdns/chart/templates/zone-bootstrap-job.yaml b/platform/powerdns/chart/templates/zone-bootstrap-job.yaml index 8d5e2e37..33005ed7 100644 --- a/platform/powerdns/chart/templates/zone-bootstrap-job.yaml +++ b/platform/powerdns/chart/templates/zone-bootstrap-job.yaml @@ -145,13 +145,40 @@ metadata: spec: backoffLimit: {{ .Values.zoneBootstrap.backoffLimit | default 6 }} activeDeadlineSeconds: {{ .Values.zoneBootstrap.activeDeadlineSeconds | default 300 }} + # Fix #144-followup (recurrence on prov #37 + #38, 2026-05-12): + # The 14m activeDeadlineSeconds bumped by Fix #144 is harmless if the + # Job's container is allowed to RUN to that deadline. But this Job + # exited within ~6 backoffs (~10 minutes wall-time) and tripped + # `BackoffLimitExceeded` — NOT `DeadlineExceeded`. Each container + # invocation curl'd `http://powerdns:8081`, the in-cluster PowerDNS + # Service whose endpoints were empty because the powerdns Deployment + # Pods were Pending (worker CPU 99% requested; bp-cnpg `pdns-pg-1-initdb` + # also Pending behind same scheduling wedge; cluster-autoscaler in + # backoff after gitea/gitea-* PV affinity conflict). curl exited 7 + # (connection refused) → container restarted → 6 backoffs → Job dead + # at ~10 minutes, well before activeDeadlineSeconds=840s. + # + # The Fix: move the readiness wait INTO the container — one pod, one + # long inner loop. See `command:` below. Combined with + # `restartPolicy: Never` (one chance, no Kubernetes-level backoff + # against backoffLimit) so the inner loop owns the wait budget, + # bounded by activeDeadlineSeconds. backoffLimit kept at 6 to recover + # from genuinely transient pod failures (image-pull retry, node + # eviction) without papering over a stuck Deployment. template: metadata: labels: {{- include "bp-powerdns.labels" . | nindent 8 }} catalyst.openova.io/component: zone-bootstrap spec: - restartPolicy: OnFailure + # Fix #144-followup: switched OnFailure → Never. The inner + # poll-for-API-ready loop now owns the wait budget (bounded by + # activeDeadlineSeconds + POWERDNS_API_READY_TIMEOUT_S env var). + # If the container itself crashes (image-pull retry, OS-level + # error), the Job's backoffLimit still triggers a new Pod — + # so transient failures recover. Steady-state retries for an + # un-Ready upstream stay inside the single Pod. + restartPolicy: Never serviceAccountName: powerdns-zone-bootstrap volumes: - name: tmp @@ -199,12 +226,84 @@ spec: secretKeyRef: name: powerdns-api-credentials key: api-key + # Fix #144-followup: inner-loop deadline for the wait-for-API + # poll. Default 600s = 10 min; surfaced via values.yaml so + # operators on slower clusters can raise it without forking + # the chart. Sits below activeDeadlineSeconds (840s) so the + # zone-creation phase has ≥240s of headroom after the API + # comes Ready. + - name: POWERDNS_API_READY_TIMEOUT_S + value: {{ .Values.zoneBootstrap.apiReadyTimeoutSeconds | default 600 | quote }} command: - /bin/sh - -c - | set -eu + api="${POWERDNS_API}" + key="${POWERDNS_API_KEY}" + + # ─── Wait for PowerDNS API to come Ready ────────────────────── + # + # Fix #144-followup (recurrence on prov #37+#38, 2026-05-12): + # The Helm post-install hook fires the moment the `powerdns` + # Deployment + Service manifests apply — but the Service has + # no Ready endpoints until the powerdns Pods themselves come + # up, and THOSE wait on bp-cnpg's `pdns-pg-app` Secret which + # only materialises after `pdns-pg-1-initdb` Pod schedules, + # runs, and completes. On a fresh Sovereign with the worker + # under capacity pressure (gitea+harbor+keycloak racing for + # CPU) that whole chain can take >10 minutes — longer than + # the Job's 6-backoff retry budget. + # + # Approach: one container run, one inner poll loop. We retry + # every 10s for up to .Values.zoneBootstrap.apiReadyTimeoutSeconds + # (default 600s = 10min, well within activeDeadlineSeconds=840s + # budget so there's still headroom for the zone-creation loop + # below). Authenticated GET /api/v1/servers returns: + # 200 — server up; proceed to bootstrap + # 401 — server up but key mismatch (FATAL, no retry) + # anything else — keep waiting + # curl's exit codes: + # 7 — connection refused (Service endpoints empty) + # 28 — operation timeout (network blip / pod warming) + # Both are retryable. Anything terminal (DNS resolution fail, + # which would mean Service object missing) we surface after + # the deadline. + + ready_deadline_s="${POWERDNS_API_READY_TIMEOUT_S:-600}" + poll_interval_s=10 + waited=0 + echo "Waiting up to ${ready_deadline_s}s for PowerDNS API at ${api}/api/v1/servers" + while :; do + status=$(curl --silent --output /tmp/api-probe \ + --write-out '%{http_code}' \ + --max-time 5 \ + -H "X-API-Key: ${key}" \ + "${api}/api/v1/servers" 2>/dev/null || echo "000") + + case "${status}" in + 200) + echo " PowerDNS API ready (HTTP 200) after ${waited}s" + break + ;; + 401|403) + echo " PowerDNS API reachable but auth rejected (HTTP ${status}) — FATAL" + cat /tmp/api-probe 2>/dev/null || true + exit 1 + ;; + *) + # 000 = curl exit (refused/timeout); 5xx = upstream not ready + if [ "${waited}" -ge "${ready_deadline_s}" ]; then + echo " PowerDNS API did not become ready within ${ready_deadline_s}s (last status=${status}) — FATAL" + exit 1 + fi + ;; + esac + sleep "${poll_interval_s}" + waited=$((waited + poll_interval_s)) + done + # Idempotent zone-creation loop. # # For each entry in .Values.zones (rendered into the @@ -232,9 +331,6 @@ spec: # one round trip; GET-then-POST is two. Same outcome, # half the cost. - api="${POWERDNS_API}" - key="${POWERDNS_API_KEY}" - create_zone() { local name="$1" local kind="$2" diff --git a/platform/powerdns/chart/values.yaml b/platform/powerdns/chart/values.yaml index ed3977c5..93c48803 100644 --- a/platform/powerdns/chart/values.yaml +++ b/platform/powerdns/chart/values.yaml @@ -540,3 +540,22 @@ zoneBootstrap: # leave Flux waiting forever; a Job that fits inside the HR cap lets # Flux's own remediation cycle reclaim the failure path). activeDeadlineSeconds: 840 + # apiReadyTimeoutSeconds — inner poll budget the container spends + # waiting for http://powerdns:8081/api/v1/servers to return HTTP 200 + # before giving up. + # + # Recurrence of #144 on prov #37+#38 (2026-05-12) traced to + # BackoffLimitExceeded, NOT DeadlineExceeded: each container invocation + # curl'd the in-cluster PowerDNS Service whose endpoints stayed empty + # (powerdns Pods Pending behind a worker-capacity wedge + slow CNPG + # initdb). curl exited 7 (connection refused), the container restarted, + # 6 backoffs (~10min wall-time) tripped the Job's backoffLimit well + # before activeDeadlineSeconds=840s. + # + # New posture: one container run, one long inner loop. This timeout + # bounds the inner poll; restartPolicy is `Never` (the container owns + # the wait budget). 600s default sits below activeDeadlineSeconds=840s + # so the zone-creation phase retains >=240s of headroom AFTER the API + # comes Ready. Operators on slower clusters can raise this without + # forking the chart per docs/INVIOLABLE-PRINCIPLES.md #4. + apiReadyTimeoutSeconds: 600