From e2b5936c2c6877061f3f379ff243fb8deedc3c84 Mon Sep 17 00:00:00 2001 From: Mike Stankavich Date: Wed, 10 Jun 2026 12:35:15 -0500 Subject: [PATCH] fix(backend): let operator-set RESEND_API_KEY survive ArgoCD sync MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Preview + prod email (org invites, password resets) silently failed: the backend logged "API key is invalid" from Resend and returned 201/200 anyway (send is best-effort, non-fatal), so the UI showed success but nothing was delivered. Root cause: the chart rendered RESEND_API_KEY unconditionally from .Values.secrets.resendApiKey (default ""), so ArgoCD owned /data/RESEND_API_KEY and rewrote it to empty on every sync. The Railway->GKE migration never carried a key into the chart, and any out-of-band operator-set value would be reverted on the next sync. An empty key is what Resend rejects as invalid. (Confirmed both trakrf-preview and trakrf-prod secrets held an empty RESEND_API_KEY.) Fix mirrors the JWT_SECRET TRA-860 pattern: - secret.yaml omits RESEND_API_KEY when empty, so ArgoCD never manages the key and an out-of-band value persists across all sync paths. - trakrf-backend Application adds /data/RESEND_API_KEY to the ignoreDifferences jsonPointers (belt-and-suspenders, per-env so prod inherits the carve-out at cutover). The real key is still set out-of-band (operator kubectl today; ESO + GCP Secret Manager later, TRA-375). Verified: helm template omits RESEND_API_KEY when unset and includes it when set; root app renders both /data/JWT_SECRET + /data/RESEND_API_KEY pointers per env. (argocd/root helm lint failure is pre-existing on main — multi-doc separator quirk — not from this change.) Co-Authored-By: Claude Opus 4.8 (1M context) --- argocd/root/templates/trakrf-backend.yaml | 18 ++++++++++-------- helm/trakrf-backend/templates/secret.yaml | 14 ++++++++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/argocd/root/templates/trakrf-backend.yaml b/argocd/root/templates/trakrf-backend.yaml index d6c5fed..4a5aa81 100644 --- a/argocd/root/templates/trakrf-backend.yaml +++ b/argocd/root/templates/trakrf-backend.yaml @@ -43,17 +43,19 @@ {{- end }} {{- $values := printf "%s%s" $base $ingress }} {{- /* - JWT_SECRET is a real, externally-set value (kubectl-managed in-cluster, not in - git — no ESO/sealed-secret ceremony today). The chart's secret.yaml renders the - placeholder default "change-me"; without this carve-out ArgoCD selfHeal reverts - the real secret → the backend's TRA-860/#428 fail-fast guard refuses to boot. - ignoreDifferences (diff) + RespectIgnoreDifferences (sync) make the operator-set - JWT_SECRET stick across syncs. Applies per-env, so prod inherits the carve-out - at cutover (its real secret must be set the same way — see TRA-375). + JWT_SECRET and RESEND_API_KEY are real, externally-set values (kubectl-managed + in-cluster, not in git — no ESO/sealed-secret ceremony today). The chart's + secret.yaml omits each when empty/placeholder; without this carve-out ArgoCD + selfHeal reverts the operator-set values on sync. For JWT that trips the + backend's TRA-860/#428 fail-fast boot guard; for RESEND it silently empties the + key so email send fails with Resend "API key is invalid" (the Railway→GKE + migration miss). ignoreDifferences (diff) + RespectIgnoreDifferences (sync) make + both stick across syncs. Applies per-env, so prod inherits the carve-out at + cutover (its real secrets must be set the same way — see TRA-375). TODO durable: replace the manual secret source with External Secrets + GCP Secret Manager (TRA-375 follow-up); the carve-out stays correct either way. */ -}} -{{- $ignore := printf "- group: \"\"\n kind: Secret\n name: trakrf-backend\n namespace: trakrf-%s\n jsonPointers:\n - /data/JWT_SECRET\n" $env }} +{{- $ignore := printf "- group: \"\"\n kind: Secret\n name: trakrf-backend\n namespace: trakrf-%s\n jsonPointers:\n - /data/JWT_SECRET\n - /data/RESEND_API_KEY\n" $env }} --- {{- include "trakrf.application" (dict "name" (printf "trakrf-backend-%s" $env) diff --git a/helm/trakrf-backend/templates/secret.yaml b/helm/trakrf-backend/templates/secret.yaml index 05e98bf..17a1072 100644 --- a/helm/trakrf-backend/templates/secret.yaml +++ b/helm/trakrf-backend/templates/secret.yaml @@ -20,5 +20,19 @@ stringData: {{- if and .Values.secrets.jwtSecret (ne .Values.secrets.jwtSecret "change-me") }} JWT_SECRET: {{ .Values.secrets.jwtSecret | quote }} {{- end }} + {{- /* + RESEND_API_KEY follows the same omit-when-empty rule as JWT_SECRET. The real + key is set out-of-band (operator kubectl today; ESO + GCP Secret Manager + later — TRA-375). If the chart rendered "", ArgoCD would own + /data/RESEND_API_KEY and revert any operator-set value to empty on every sync + — which is exactly what silently broke preview + prod email after the + Railway→GKE migration (empty key → Resend "API key is invalid", logged as a + non-fatal warning so the UI still showed success). Omitting the key when empty + leaves ArgoCD unaware of it, so the out-of-band value persists. A matching + ignoreDifferences carve-out on /data/RESEND_API_KEY (see + argocd/root/templates/trakrf-backend.yaml) is belt-and-suspenders. + */}} + {{- if .Values.secrets.resendApiKey }} RESEND_API_KEY: {{ .Values.secrets.resendApiKey | quote }} + {{- end }} SENTRY_DSN: {{ .Values.secrets.sentryDsn | quote }}