From 0cd8e0d6468403c73a4b3d54860040335148088c Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:02:01 -0400 Subject: [PATCH 01/21] =?UTF-8?q?test(failing):=20ci-checks=20retry-once?= =?UTF-8?q?=20recovers=20a=20flake=20+=20knob/non-numeric=20guards=20(TDD?= =?UTF-8?q?=200040=20=C2=A71,=20FR-15/NFR-4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/transient-gate-resilience.test.sh | 120 ++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 tests/transient-gate-resilience.test.sh diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh new file mode 100644 index 0000000..448f0fc --- /dev/null +++ b/tests/transient-gate-resilience.test.sh @@ -0,0 +1,120 @@ +#!/usr/bin/env bash +# transient-gate-resilience.test.sh — eval for TDD 0040 (transient gate-failure +# resilience). Two transient gate-failure modes are made honest and non-fatal: +# +# Component 1 — ci-checks retry-once. On a ci-checks failure the gate re-runs +# the checks up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) more times before +# declaring FAIL; the FIRST passing run wins, a recovered flake is logged (not +# silent), and only the initial run AND all retries failing is a real FAIL. +# RETRIES=0 restores the no-retry behavior; a non-numeric value default-warns. +# +# Component 2 — no-verdict → gate-unobservable. A review/verify gate subprocess +# that exits leaving NO parseable verdict line (no REVIEW_RESULT: / no +# VERIFY_RUNTIME:), REGARDLESS of exit code, is classified `gate-unobservable` +# (a resumable blocked halt) instead of a terminal `failed` — couldn't-observe +# is not observed-wrong (ADR 0006 / NFR-4). The classification is a +# gate-agnostic helper (_classify_gate_no_verdict) the review gate (_rework_loop +# in lib/gates.sh) drives; the verify-runtime gate's terminal-state write lives +# in lib/resume.sh's gate_one (OUTSIDE this TDD's declared ## Touched files), so +# §4 exercises the helper with gate=verify-runtime to pin the gate-agnostic +# classification the verify call site reuses. +# +# Component 3 — enum + render mirror. `gate-unobservable` is admitted by the +# closed FR-63 halt-cause enum with a resume-first next-action list (state.sh) +# and rendered without an unknown-cause warning (status.sh). +# +# Covers the TDD's Verification plan §1-§6, following the fixture pattern of +# tests/runtime-verify-resume.test.sh (§5/§6 enum + render) and +# tests/structural-classification-bound.test.sh (a stub `claude` review gate +# driving the real gate_one + _rework_loop). Stubs mean no model or tokens are +# needed; all subprocess exit codes + outputs are explicit fixtures. +# +# §1 ci-checks flaky-then-green → PASS (retry recovers; telemetry logged); RETRIES=0 → FAIL +# §2 ci-checks red-twice → real FAIL (no false PASS); RETRIES non-numeric → default-and-warn +# §3 review no-verdict → gate-unobservable (resumable), gate=review + stderr-tail detail +# §4 verify no-verdict → gate-unobservable (gate=verify-runtime) via the gate-agnostic helper +# §5 observed REVIEW_RESULT: BLOCK is UNTOUCHED (discriminator is verdict-presence) +# §6 enum membership + status.sh render (resumable=blocked; no unknown-cause warning) +# +# Run: bash tests/transient-gate-resilience.test.sh +set -uo pipefail +REPO="$(cd "$(dirname "$0")/.." && pwd)" +IMPL="$REPO/scripts/implement.sh" +RESULTS="$(mktemp)"; export RESULTS +ok() { printf 'ok\n' >>"$RESULTS"; printf ' ok — %s\n' "$1"; } +bad() { printf 'fail\n' >>"$RESULTS"; printf ' FAIL — %s\n' "$1"; } + +ROOT="$(mktemp -d)"; trap 'rm -rf "$ROOT"' EXIT + +# =========================================================================== +# §1: ci-checks flaky-then-green → PASS. A stub ci-checks.sh fails on its first +# invocation and passes on the second (keyed off a counter file). With +# THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks PASSES and the gate log records a +# recovered-flake telemetry line; with RETRIES=0 the SAME stub FAILS (no retry). +echo "[§1] ci-checks flaky-then-green → PASS with retry; RETRIES=0 → FAIL (knob governs)" +( D="$ROOT/s1"; mkdir -p "$D"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + mkdir -p "$D/state.d"; TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + # Stub ci-checks: fail on attempt 1, pass on attempt 2+ (counter file). The + # counter is reset per scenario so each run starts on the flaky first attempt. + cnt="$D/ci.count" + cat > "$D/ci-checks-stub.sh" </dev/null || echo 0) + 1 )); echo "\$n" > "$cnt" +echo "ci-checks stub invocation \$n" +[ "\$n" -ge 2 ] && exit 0 || exit 1 +EOF + chmod +x "$D/ci-checks-stub.sh" + export CI_CHECKS="$D/ci-checks-stub.sh" + + # RETRIES=1: initial run flakes, retry passes → PASS. + printf '0\n' > "$cnt"; : > "$D/r1.log" + THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks "$D/r1.log"; rc=$? + [ "$rc" -eq 0 ] && ok "flaky-then-green run_ci_checks returns 0 with RETRIES=1" || bad "retry should recover the flake (got rc=$rc)" + grep -qiE 'passed on retry' "$D/r1.log" \ + && ok "the gate log records a recovered-flake telemetry line" || bad "recovered flake must be logged, not silent (NFR-4); log: $(cat "$D/r1.log")" + + # RETRIES=0: no retry — the same flaky stub FAILS on its single attempt. + printf '0\n' > "$cnt"; : > "$D/r0.log" + THROUGHLINE_CI_CHECKS_RETRIES=0 run_ci_checks "$D/r0.log"; rc0=$? + [ "$rc0" -ne 0 ] && ok "RETRIES=0 disables the retry (the knob governs it)" || bad "RETRIES=0 should NOT retry (got rc=$rc0)" +) || true + +# =========================================================================== +# §2: ci-checks red-twice → real FAIL (no false PASS). A stub that fails on EVERY +# invocation must FAIL even with the retry — retry only re-observes a one-off, it +# never masks a reproducible failure (NFR-4). A non-numeric RETRIES default-warns. +echo "[§2] ci-checks red-twice → real FAIL (retry never masks a reproducible failure); RETRIES non-numeric → default-and-warn" +( D="$ROOT/s2"; mkdir -p "$D"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + mkdir -p "$D/state.d"; TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + # Stub that ALWAYS fails (a reproducible regression). + cat > "$D/ci-red.sh" <<'EOF' +#!/usr/bin/env bash +echo "ci-checks red (reproducible failure)"; exit 1 +EOF + chmod +x "$D/ci-red.sh" + export CI_CHECKS="$D/ci-red.sh" + + : > "$D/red.log" + THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks "$D/red.log"; rc=$? + [ "$rc" -ne 0 ] && ok "red-twice run_ci_checks returns non-zero (real FAIL, no false PASS)" || bad "a reproducible failure must FAIL even with retry (got rc=$rc)" + ! grep -qiE 'passed on retry' "$D/red.log" \ + && ok "no recovered-flake telemetry on a genuine FAIL" || bad "must NOT log a recovered flake when it really failed" + + # Non-numeric RETRIES → default-and-warn (still bounded; mirrors WATCH_MAX_SECS). + printf '0\n' > "$D/c2" + cat > "$D/ci-flaky2.sh" </dev/null || echo 0) + 1 )); echo "\$n" > "$D/c2" +[ "\$n" -ge 2 ] && exit 0 || exit 1 +EOF + chmod +x "$D/ci-flaky2.sh"; export CI_CHECKS="$D/ci-flaky2.sh" + : > "$D/warn.log" + warn="$(THROUGHLINE_CI_CHECKS_RETRIES=abc run_ci_checks "$D/warn.log" 2>&1 >/dev/null)"; rcw=$? + [ "$rcw" -eq 0 ] && ok "non-numeric RETRIES defaults to 1 (flaky-then-green still recovers)" || bad "non-numeric RETRIES should default to 1 and retry (got rc=$rcw)" + printf '%s' "$warn" | grep -qiE 'not numeric|falling back' \ + && ok "non-numeric RETRIES emits a default-and-warn diagnostic" || bad "non-numeric RETRIES should warn (got: '$warn')" +) || true From 51af30c4079bf88c6207f2253fd6afd901916c79 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:03:08 -0400 Subject: [PATCH 02/21] step(1): ci-checks retry-once in run_ci_checks (TDD 0040 Component 1, FR-15/NFR-4) On a ci-checks failure, re-run up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) more times in the same worktree; the first passing run wins and a recovered flake is logged (not silent). RETRIES=0 restores no-retry; non-numeric default-and-warns. Signature unchanged so the gate_one call site is untouched. Co-Authored-By: Claude Opus 4.8 --- scripts/lib/gates.sh | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 5c6dd0f..4c1a496 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -544,7 +544,42 @@ _fresh_review_verdict() { # | grep -aE '^[`[:space:]]*REVIEW_RESULT:' \ | tail -1 } -run_ci_checks() { bash "$CI_CHECKS" >>"$1" 2>&1; } +# run_ci_checks — TDD 0040 §1 (FR-15, NFR-4). Run ci-checks.sh; on a +# non-zero exit, re-run the checks up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) +# more times in the SAME worktree (sequential, no parallelism). The FIRST passing +# run wins (PASS); only the initial run AND every retry failing is a real FAIL — +# so a transient suite flake is re-observed, never guessed past (ADR 0006), and a +# reproducible regression still FAILs. A pass on retry writes a +# "passed on retry N (initial run flaked)" telemetry line to the gate log so a +# recovered flake is visible, not silent (NFR-4: honest about the recovery). +# THROUGHLINE_CI_CHECKS_RETRIES=0 restores the no-retry behavior (an escape hatch +# for a deterministic-suite project); a non-numeric value defaults-and-warns +# (mirrors the THROUGHLINE_WATCH_MAX_SECS validation pattern). The signature is +# unchanged ( only) so the gate_one call site is untouched. +run_ci_checks() { # + local log="$1" + local retries="${THROUGHLINE_CI_CHECKS_RETRIES:-1}" + case "$retries" in + ''|*[!0-9]*) + echo "warning: THROUGHLINE_CI_CHECKS_RETRIES='$retries' not numeric; falling back to 1" >&2 + retries=1 ;; + esac + local attempt=0 + while :; do + if bash "$CI_CHECKS" >>"$log" 2>&1; then + if [ "$attempt" -gt 0 ]; then + printf 'ci-checks: passed on retry %d (initial run flaked; recovered, NFR-4)\n' "$attempt" >> "$log" + fi + return 0 + fi + if [ "$attempt" -ge "$retries" ]; then + return 1 + fi + attempt=$((attempt + 1)) + printf 'ci-checks: attempt failed; re-running (retry %d of %d, THROUGHLINE_CI_CHECKS_RETRIES)\n' \ + "$attempt" "$retries" >> "$log" + done +} # _test_first_ok_range — the SHARED test-first # predicate (TDD 0038 §1 / FR-15a). Returns 0 iff `git log ..` # contains a commit subject matching `^test(failing)` case-insensitively From a20496b957078cce37db0d7b7c4802a6dfeb14c0 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:14:34 -0400 Subject: [PATCH 03/21] fix(step 1): add result-tally/exit-code epilogue so eval assertions are enforceable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The test file exited 0 unconditionally (no epilogue reading RESULTS), making every §1-§2 assertion non-enforceable. Add the standard PASS/FAIL tally + final [ "$FAIL" -eq 0 ] so a single failing assertion exits the script non-zero (FR-74 #1 fail-loud; lets the step-5 aggregator wire-in catch regressions). Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index 448f0fc..37a75df 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -118,3 +118,15 @@ EOF printf '%s' "$warn" | grep -qiE 'not numeric|falling back' \ && ok "non-numeric RETRIES emits a default-and-warn diagnostic" || bad "non-numeric RETRIES should warn (got: '$warn')" ) || true + +# --- report ---------------------------------------------------------------- +# Fail loud (FR-74 #1): the result tally is what makes every assertion above +# enforceable — the final `[ "$FAIL" -eq 0 ]` sets the script's exit code, so a +# single `bad` makes `bash tests/transient-gate-resilience.test.sh` exit non-zero +# and the aggregator's `|| TGR_FAIL=1` (step 5) catches it. +echo +PASS="$(grep -c '^ok$' "$RESULTS" 2>/dev/null)"; PASS="${PASS:-0}" +FAIL="$(grep -c '^fail$' "$RESULTS" 2>/dev/null)"; FAIL="${FAIL:-0}" +rm -f "$RESULTS" +echo "=== transient-gate-resilience eval: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ] From 378eebf2040d5e3b38151135fb5cdf6347d1d98c Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:27:45 -0400 Subject: [PATCH 04/21] =?UTF-8?q?test(failing):=20retries-exhausted=20FAIL?= =?UTF-8?q?=20must=20log=20an=20explicit=20FAILED-after-N=20line=20(TDD=20?= =?UTF-8?q?0040=20=C2=A71,=20NFR-4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Genuine red→green for step 1: with the enforcement epilogue now present, this assertion exits the suite non-zero against the current implementation (which returns silently on exhaustion). Re-establishes the failing-test-first discipline the prior vacuous test(failing) commit lacked. Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index 37a75df..a061742 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -103,6 +103,11 @@ EOF [ "$rc" -ne 0 ] && ok "red-twice run_ci_checks returns non-zero (real FAIL, no false PASS)" || bad "a reproducible failure must FAIL even with retry (got rc=$rc)" ! grep -qiE 'passed on retry' "$D/red.log" \ && ok "no recovered-flake telemetry on a genuine FAIL" || bad "must NOT log a recovered flake when it really failed" + # NFR-4 honesty: a real FAIL must be as visible in the gate log as a recovery — + # record an explicit "FAILED after N attempt(s)" line on retry exhaustion so a + # reader can tell a retries-exhausted FAIL from a single-shot one. + grep -qiE 'ci-checks: FAILED after [0-9]+ attempt' "$D/red.log" \ + && ok "retry exhaustion logs an explicit FAILED-after-N telemetry line" || bad "a retries-exhausted FAIL must be logged (NFR-4); log: $(cat "$D/red.log")" # Non-numeric RETRIES → default-and-warn (still bounded; mirrors WATCH_MAX_SECS). printf '0\n' > "$D/c2" From 589b1770f5a103737822f06277bddd43ed071b89 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:28:26 -0400 Subject: [PATCH 05/21] =?UTF-8?q?step(1):=20log=20an=20explicit=20FAILED-a?= =?UTF-8?q?fter-N=20line=20on=20ci-checks=20retry=20exhaustion=20(TDD=2000?= =?UTF-8?q?40=20=C2=A71,=20NFR-4)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A retries-exhausted real FAIL now records a telemetry line as visible as the recovered-flake line, so the gate log distinguishes a retries-exhausted FAIL from a single-shot one. Completes the genuine red→green for step 1. Co-Authored-By: Claude Opus 4.8 --- scripts/lib/gates.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 4c1a496..39b0f0b 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -573,6 +573,10 @@ run_ci_checks() { # return 0 fi if [ "$attempt" -ge "$retries" ]; then + # NFR-4 honesty: a retries-exhausted FAIL is recorded as explicitly as a + # recovery, so a reader can tell it apart from a single-shot failure. + printf 'ci-checks: FAILED after %d attempt(s) (initial + %d retries; retries exhausted, real FAIL)\n' \ + "$((attempt + 1))" "$retries" >> "$log" return 1 fi attempt=$((attempt + 1)) From 0f210a7ad77cf796e3ad43bc9ab559ad474073c6 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:42:22 -0400 Subject: [PATCH 06/21] revert(step 1): back out ci-checks retry impl to re-derive it test-first MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The initial test(failing) commit (0cd8e0d) was vacuous — it lacked the enforcement epilogue, so it exited 0 against pre-impl code and never drove the core retry behavior with a genuine red. History cannot be rewritten (divergence guard), so re-derive honestly: with the epilogue now present, backing out the implementation makes the §1 core-retry assertions genuinely RED (suite exits non-zero), and the next commit re-implements to green. Co-Authored-By: Claude Opus 4.8 --- scripts/lib/gates.sh | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 39b0f0b..5c6dd0f 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -544,46 +544,7 @@ _fresh_review_verdict() { # | grep -aE '^[`[:space:]]*REVIEW_RESULT:' \ | tail -1 } -# run_ci_checks — TDD 0040 §1 (FR-15, NFR-4). Run ci-checks.sh; on a -# non-zero exit, re-run the checks up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) -# more times in the SAME worktree (sequential, no parallelism). The FIRST passing -# run wins (PASS); only the initial run AND every retry failing is a real FAIL — -# so a transient suite flake is re-observed, never guessed past (ADR 0006), and a -# reproducible regression still FAILs. A pass on retry writes a -# "passed on retry N (initial run flaked)" telemetry line to the gate log so a -# recovered flake is visible, not silent (NFR-4: honest about the recovery). -# THROUGHLINE_CI_CHECKS_RETRIES=0 restores the no-retry behavior (an escape hatch -# for a deterministic-suite project); a non-numeric value defaults-and-warns -# (mirrors the THROUGHLINE_WATCH_MAX_SECS validation pattern). The signature is -# unchanged ( only) so the gate_one call site is untouched. -run_ci_checks() { # - local log="$1" - local retries="${THROUGHLINE_CI_CHECKS_RETRIES:-1}" - case "$retries" in - ''|*[!0-9]*) - echo "warning: THROUGHLINE_CI_CHECKS_RETRIES='$retries' not numeric; falling back to 1" >&2 - retries=1 ;; - esac - local attempt=0 - while :; do - if bash "$CI_CHECKS" >>"$log" 2>&1; then - if [ "$attempt" -gt 0 ]; then - printf 'ci-checks: passed on retry %d (initial run flaked; recovered, NFR-4)\n' "$attempt" >> "$log" - fi - return 0 - fi - if [ "$attempt" -ge "$retries" ]; then - # NFR-4 honesty: a retries-exhausted FAIL is recorded as explicitly as a - # recovery, so a reader can tell it apart from a single-shot failure. - printf 'ci-checks: FAILED after %d attempt(s) (initial + %d retries; retries exhausted, real FAIL)\n' \ - "$((attempt + 1))" "$retries" >> "$log" - return 1 - fi - attempt=$((attempt + 1)) - printf 'ci-checks: attempt failed; re-running (retry %d of %d, THROUGHLINE_CI_CHECKS_RETRIES)\n' \ - "$attempt" "$retries" >> "$log" - done -} +run_ci_checks() { bash "$CI_CHECKS" >>"$1" 2>&1; } # _test_first_ok_range — the SHARED test-first # predicate (TDD 0038 §1 / FR-15a). Returns 0 iff `git log ..` # contains a commit subject matching `^test(failing)` case-insensitively From 247dc330df327d62e9bec29a7e5f3f6eea30005e Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:42:56 -0400 Subject: [PATCH 07/21] step(1): ci-checks retry-once re-derived from the now-genuinely-failing eval (TDD 0040 Component 1, FR-15/NFR-4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Re-implements run_ci_checks retry-once after the test-first backout (0f210a7): the §1 core-retry assertions were genuinely RED against the backed-out code and this commit greens them. On a ci-checks failure, re-run up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) more times; first pass wins, a recovered flake and a retries-exhausted FAIL are both logged explicitly (NFR-4). RETRIES=0 disables retry; non-numeric default-and-warns. Signature unchanged. Co-Authored-By: Claude Opus 4.8 --- scripts/lib/gates.sh | 42 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 41 insertions(+), 1 deletion(-) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 5c6dd0f..9c48bb9 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -544,7 +544,47 @@ _fresh_review_verdict() { # | grep -aE '^[`[:space:]]*REVIEW_RESULT:' \ | tail -1 } -run_ci_checks() { bash "$CI_CHECKS" >>"$1" 2>&1; } +# run_ci_checks — TDD 0040 §1 (FR-15, NFR-4). Run ci-checks.sh; on a +# non-zero exit, re-run the checks up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) +# more times in the SAME worktree (sequential, no parallelism). The FIRST passing +# run wins (PASS); only the initial run AND every retry failing is a real FAIL — +# so a transient suite flake is re-observed, never guessed past (ADR 0006), and a +# reproducible regression still FAILs. A pass on retry writes a +# "passed on retry N (initial run flaked)" telemetry line to the gate log so a +# recovered flake is visible, not silent; a retries-exhausted FAIL writes an +# equally explicit "FAILED after N attempt(s)" line (NFR-4: honest both ways). +# THROUGHLINE_CI_CHECKS_RETRIES=0 restores the no-retry behavior (an escape hatch +# for a deterministic-suite project); a non-numeric value defaults-and-warns +# (mirrors the THROUGHLINE_WATCH_MAX_SECS validation pattern). The signature is +# unchanged ( only) so the gate_one call site is untouched. +run_ci_checks() { # + local log="$1" + local retries="${THROUGHLINE_CI_CHECKS_RETRIES:-1}" + case "$retries" in + ''|*[!0-9]*) + echo "warning: THROUGHLINE_CI_CHECKS_RETRIES='$retries' not numeric; falling back to 1" >&2 + retries=1 ;; + esac + local attempt=0 + while :; do + if bash "$CI_CHECKS" >>"$log" 2>&1; then + if [ "$attempt" -gt 0 ]; then + printf 'ci-checks: passed on retry %d (initial run flaked; recovered, NFR-4)\n' "$attempt" >> "$log" + fi + return 0 + fi + if [ "$attempt" -ge "$retries" ]; then + # NFR-4 honesty: a retries-exhausted FAIL is recorded as explicitly as a + # recovery, so a reader can tell it apart from a single-shot failure. + printf 'ci-checks: FAILED after %d attempt(s) (initial + %d retries; retries exhausted, real FAIL)\n' \ + "$((attempt + 1))" "$retries" >> "$log" + return 1 + fi + attempt=$((attempt + 1)) + printf 'ci-checks: attempt failed; re-running (retry %d of %d, THROUGHLINE_CI_CHECKS_RETRIES)\n' \ + "$attempt" "$retries" >> "$log" + done +} # _test_first_ok_range — the SHARED test-first # predicate (TDD 0038 §1 / FR-15a). Returns 0 iff `git log ..` # contains a commit subject matching `^test(failing)` case-insensitively From 44face7620ad89c77304b219cf09eeaf4ee40916 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:57:17 -0400 Subject: [PATCH 08/21] =?UTF-8?q?test(failing):=20gate-unobservable=20enum?= =?UTF-8?q?=20membership=20+=20resume-first=20action=20+=20status=20render?= =?UTF-8?q?=20(TDD=200040=20=C2=A76,=20Component=203)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index a061742..1478530 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -124,6 +124,54 @@ EOF && ok "non-numeric RETRIES emits a default-and-warn diagnostic" || bad "non-numeric RETRIES should warn (got: '$warn')" ) || true +# =========================================================================== +# §6: enum membership + status.sh render (Component 3). set_halt_cause +# gate-unobservable returns 0 and writes the cause; the first next-action +# begins with `resume` (the resumable marker _resume_from + status.sh +# --check-paused key on); a value NOT in the closed enum still returns 1 (the +# addition is what admits gate-unobservable, not a wildcard). status.sh +# --check-paused surfaces it resumable=blocked, and the full render emits no +# unknown-cause warning. +echo "[§6] gate-unobservable: closed-enum membership, resume-first action, status.sh render" +( D="$ROOT/s6"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + _next_actions_for_cause gate-unobservable >/dev/null 2>&1 \ + && ok "_next_actions_for_cause admits gate-unobservable" || bad "gate-unobservable should be enumerated" + acts="$(_next_actions_for_cause gate-unobservable 2>/dev/null)" + printf '%s' "$acts" | grep -qE '^resume' \ + && ok "gate-unobservable's first next-action begins with resume" || bad "first next-action must begin with resume (got '$acts')" + # The action labels must be comma-free per element so the CSV round-trips. + _write_tdd_fragment 0040-x 40 docs/tdd/0040-x.md 1 blocked review 1000 1000 "feat/0040-x" "" log "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" "" "" "" "" "" + set_halt_cause 0040-x gate-unobservable review "timeout: failed to run command 'claude': No such file or directory" 2>/dev/null; rc=$? + [ "$rc" -eq 0 ] && ok "set_halt_cause gate-unobservable returns 0" || bad "set_halt_cause should accept gate-unobservable (got rc=$rc)" + hc="$(_read_fragment_field "$STATE_DIR/0040-x.json" halt_cause)" + [ "$hc" = "gate-unobservable" ] && ok "halt_cause written = gate-unobservable" || bad "halt_cause should be gate-unobservable (got '$hc')" + # Negative: an unknown cause still returns 1 (the enum is still closed). + set_halt_cause 0040-x not-a-real-cause-xyz review "" 2>/dev/null; rc2=$? + [ "$rc2" -ne 0 ] && ok "an unknown cause still returns non-zero (enum stays closed)" || bad "unknown cause must return non-zero" +) || true + +echo "[§6b] status.sh surfaces gate-unobservable as resumable=blocked with no unknown-cause warning" +( D="$ROOT/s6b"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + printf '{"schema":1,"started_at":1000,"updated_at":1001,"pid":1,"state":"blocked","total":1,"completed":0,"failed":0,"blocked":1,"skipped":0,"paused":0}\n' > "$D/state.d/run.json" + _write_tdd_fragment 0040-x 40 docs/tdd/0040-x.md 1 blocked review 1000 1000 "feat/0040-x" "" log "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" "" "" "" "" "" + set_halt_cause 0040-x gate-unobservable review "timeout: No such file or directory" 2>/dev/null + cp="$(bash "$REPO/scripts/status.sh" --logdir "$D" --check-paused 2>&1)" + printf '%s' "$cp" | grep -qE 'slug=0040-x .*cause=gate-unobservable resumable=blocked' \ + && ok "--check-paused surfaces cause=gate-unobservable resumable=blocked" || bad "should surface gate-unobservable resumable=blocked (got: '$cp')" + out="$(bash "$REPO/scripts/status.sh" --logdir "$D" 2>&1)" + printf '%s' "$out" | grep -qi 'unknown halt_cause' \ + && bad "status.sh must NOT warn unknown-cause for gate-unobservable (got: $out)" \ + || ok "full render emits no unknown-cause fallback warning" + printf '%s' "$out" | grep -q 'gate-unobservable' \ + && ok "the gate-unobservable cause label appears in the halt render" || bad "render should name gate-unobservable (got: $out)" +) || true + # --- report ---------------------------------------------------------------- # Fail loud (FR-74 #1): the result tally is what makes every assertion above # enforceable — the final `[ "$FAIL" -eq 0 ]` sets the script's exit code, so a From 93a46ad0223cbeca107728d039b2a8131af76563 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 09:58:17 -0400 Subject: [PATCH 09/21] step(2): add gate-unobservable to the closed halt-cause enum + status render mirror (TDD 0040 Component 3, FR-57/NFR-4) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit state.sh _next_actions_for_cause gains a gate-unobservable arm with a resume-first action list (no revision precondition — a no-verdict gate is safe to re-run), admitting it to the closed FR-63 enum so set_halt_cause accepts it and the blocked fragment is auto-resumable via _resume_from's blocked arm. status.sh _halt_cause_known gains the mirror so it renders without an unknown-cause warning. No schema change. Co-Authored-By: Claude Opus 4.8 --- scripts/lib/state.sh | 11 +++++++++++ scripts/status.sh | 1 + 2 files changed, 12 insertions(+) diff --git a/scripts/lib/state.sh b/scripts/lib/state.sh index 6a7a4a6..c3c769d 100644 --- a/scripts/lib/state.sh +++ b/scripts/lib/state.sh @@ -935,6 +935,17 @@ _next_actions_for_cause() { # is revised + merged (the §3 verify-plan-unrevised guard enforces the # precondition). Mirrors structural-finding's resume-after-revision shape. echo "resume (re-run runtime-verify against the revised verification plan),revise the TDD's ## Verification plan via /tdd-author" ;; + gate-unobservable) + # TDD 0040 §3 (FR-57, NFR-4): a review/runtime-verify gate SUBPROCESS that + # exited leaving NO parseable verdict line (crash, exec failure, empty + # output) is couldn't-observe, not observed-wrong (ADR 0006: a missing + # verdict is the absence of an artifact, so it cannot BE a verdict). The + # FIRST element begins with `resume`, the machine-readable marker + # status.sh --check-paused and _resume_from's blocked arm key on. Unlike + # verify-unobservable (which needs a verification-plan revision first), a + # no-verdict gate is genuinely safe to re-run with no operator intent — the + # gate simply could not run — so the resume needs no revision precondition. + echo "resume (re-runs the gate),see the gate log for why the gate emitted no verdict" ;; design-escalation) echo "revise TDD via /tdd-author,/adr-new if a constraint is being challenged" ;; external-blocker) diff --git a/scripts/status.sh b/scripts/status.sh index 31f12c0..28f5d2c 100755 --- a/scripts/status.sh +++ b/scripts/status.sh @@ -200,6 +200,7 @@ _halt_cause_known() { # design-escalation|external-blocker) return 0 ;; resume-blocked-integration-conflict) return 0 ;; # TDD 0031 §3c (mirrors state.sh enum) verify-unobservable) return 0 ;; # TDD 0035 §1 (mirrors state.sh enum); FR-64 renders it without the unknown-cause warning + gate-unobservable) return 0 ;; # TDD 0040 §3 (mirrors state.sh enum); a no-verdict gate renders without the unknown-cause warning *) return 1 ;; esac } From 00fd47e2ccc2e5821052409fd509c6556801e041 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 10:14:19 -0400 Subject: [PATCH 10/21] =?UTF-8?q?test(failing):=20review/verify=20no-verdi?= =?UTF-8?q?ct=20=E2=86=92=20gate-unobservable;=20observed=20BLOCK=20untouc?= =?UTF-8?q?hed=20(TDD=200040=20=C2=A73-=C2=A75,=20Component=202)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 158 ++++++++++++++++++++++++ 1 file changed, 158 insertions(+) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index 1478530..86a3362 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -124,6 +124,164 @@ EOF && ok "non-numeric RETRIES emits a default-and-warn diagnostic" || bad "non-numeric RETRIES should warn (got: '$warn')" ) || true +# =========================================================================== +# tgr_setup_review_repo — git repo + scope-declaring TDD + a state fragment +# + a stub `claude` acting as BOTH the review gate (cats $CTL/review.out, exits +# $CTL/review.rc) and the rework model (runs $CTL/do_rework). Gates 1-3 are marked +# done so gate_one runs ONLY the review gate + its bounded rework loop. Mirrors +# tests/structural-classification-bound.test.sh::scb_setup_repo. Leaves PWD in the +# repo. The caller exports STATE_DIR etc. + sources $IMPL first. +tgr_setup_review_repo() { # + local d="$1"; mkdir -p "$d/ctl" "$d/bin" + cd "$d" || return 1 + git init -q -b master; git config user.email t@t.t; git config user.name t + mkdir -p src docs/tdd + printf 'ctl/\nbin/\n' > .gitignore + printf 'orig\n' > src/a.txt + cat > docs/tdd/0099-fix.md <<'EOF' +# TDD 0099: fixture +Status: draft +PRD refs: 1 + +## Touched files +- `src/a.txt` — the in-scope file + +## Expected diff size +- `src/a.txt` — ~50 lines added +EOF + git add -A; git commit -qm "build start" >/dev/null + printf '0\n' > "$d/ctl/review.rc" # default: review subprocess exits 0 + cat > "$d/bin/claude" </dev/null + exit "\$(cat "$d/ctl/review.rc" 2>/dev/null || echo 0)" +fi +echo "BATCH_RESULT: OK"; exit 0 +EOF + chmod +x "$d/bin/claude" + export PATH="$d/bin:$PATH" + export RTMPL="$REPO/scripts/review-prompt.md" RWTMPL="$REPO/scripts/rework-prompt.md" + export REVIEW_MODEL="" REBUILD=0 BASE=master + export THROUGHLINE_GATE_RETRIES=1 THROUGHLINE_GATE_BACKOFF_BASE=0 + export THROUGHLINE_REQUIRE_TEST_FIRST=0 THROUGHLINE_REQUIRE_RUNTIME_VERIFY=0 + RESUME_GATES_DONE_0099_fix="build,test-first,verify,verify-runtime" + export RESUME_GATES_DONE_0099_fix + _write_tdd_fragment 0099-fix 99 docs/tdd/0099-fix.md 1 reviewing review \ + 1000 1000 "feat/0099-fix" "" "log" "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" +} +# Commit build output past build-start so the consolidated review scope is +# non-empty (the empty-scope guard fails closed on a HEAD..HEAD scope). +tgr_build_output() { printf 'build-output\n' >> src/a.txt; git add -A; git commit -qm "build: simulated output" >/dev/null; } + +# §3: review no-verdict → gate-unobservable (resumable). The stub review +# subprocess exits rc=1 emitting NO REVIEW_RESULT: line plus a stderr line +# mimicking the `timeout … No such file` exec error. The fragment must record +# halt_cause=gate-unobservable (NOT failed/null), halt_cause_detail naming `review` +# + the stderr tail, status=blocked, and halt_next_actions beginning with resume — +# couldn't-observe, not observed-wrong (ADR 0006 / NFR-4). +echo "[§3] review subprocess emits NO REVIEW_RESULT → gate-unobservable (resumable, gate=review, stderr-tail detail)" +( D="$ROOT/s3"; mkdir -p "$D/state.d" + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" + export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } + BS="$(git rev-parse HEAD)"; tgr_build_output + # No-verdict review: emit the exec-error tail to stdout (captured to the gate + # log by _claude_call) and exit non-zero, with NO REVIEW_RESULT line. + printf "timeout: failed to run command 'claude': No such file or directory\n" > "$D/repo/ctl/review.out" + printf '1\n' > "$D/repo/ctl/review.rc" + : > "$D/s3.log" + st="$(gate_one docs/tdd/0099-fix.md "$BS" "$D/s3.log")"; rc=$? + F="$STATE_DIR/0099-fix.json" + [ "$rc" -ne 0 ] && ok "gate_one does not clear on a no-verdict review" || bad "no-verdict review should not converge (rc=$rc)" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" = "gate-unobservable" ] && ok "review no-verdict → halt_cause=gate-unobservable" || bad "halt_cause should be gate-unobservable, NOT failed/null (got '$hc')" + stt="$(sed -n 's/.*"status":"\([^"]*\)".*/\1/p' "$F" | head -1)" + [ "$stt" = "blocked" ] && ok "fragment ends at status=blocked (resumable), not failed" || bad "status should be blocked (got '$stt')" + det="$(_read_fragment_field "$F" halt_cause_detail)" + printf '%s' "$det" | grep -q 'review' \ + && ok "halt_cause_detail names the gate (review)" || bad "detail should name review (got '$det')" + printf '%s' "$det" | grep -qi 'No such file or directory' \ + && ok "halt_cause_detail carries the captured stderr tail" || bad "detail should carry the stderr tail (got '$det')" + acts="$(sed -n 's/.*\("halt_next_actions":\[[^]]*\]\).*/\1/p' "$F" | head -1)" + printf '%s' "$acts" | grep -qE '(\[|,)"resume' \ + && ok "halt_next_actions begins with a resume action (auto-resumable)" || bad "halt_next_actions should begin with resume (got '$acts')" +) || true + +# §4: verify no-verdict → gate-unobservable (gate=verify-runtime). The verify-gate +# terminal-state write lives in lib/resume.sh's gate_one (OUTSIDE this TDD's +# declared ## Touched files), so this exercises the gate-agnostic classifier the +# verify call site reuses: _classify_gate_no_verdict verify-runtime +# must record the SAME resumable gate-unobservable halt with gate=verify-runtime. +echo "[§4] verify-runtime no-verdict → gate-unobservable via the gate-agnostic helper (gate=verify-runtime)" +( D="$ROOT/s4"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + command -v _classify_gate_no_verdict >/dev/null 2>&1 \ + && ok "_classify_gate_no_verdict helper is defined (gate-agnostic)" || bad "_classify_gate_no_verdict should exist" + _write_tdd_fragment 0040-v 40 docs/tdd/0040-v.md 1 verifying verify-runtime 1000 1000 "feat/0040-v" "" log "" "" "build,test-first,verify" "" "" "" "" "" "" "" "" "" "" "" "" + _classify_gate_no_verdict 0040-v verify-runtime "timeout: failed to run command 'claude': No such file or directory" 2>/dev/null + F="$STATE_DIR/0040-v.json" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" = "gate-unobservable" ] && ok "verify-runtime no-verdict → halt_cause=gate-unobservable" || bad "halt_cause should be gate-unobservable (got '$hc')" + stt="$(sed -n 's/.*"status":"\([^"]*\)".*/\1/p' "$F" | head -1)" + [ "$stt" = "blocked" ] && ok "verify no-verdict ends at status=blocked (resumable)" || bad "status should be blocked (got '$stt')" + det="$(_read_fragment_field "$F" halt_cause_detail)" + printf '%s' "$det" | grep -q 'verify-runtime' \ + && ok "halt_cause_detail names the gate (verify-runtime)" || bad "detail should name verify-runtime (got '$det')" +) || true + +# §5: observed REVIEW_RESULT: BLOCK is UNTOUCHED — the discriminator is +# verdict-presence, not exit code. A review that emits BLOCK + a halting finding +# drives the bounded-rework path (converges here), NOT a gate-unobservable +# reclassification. Even when the subprocess ALSO exits non-zero, the verdict wins. +echo "[§5] observed REVIEW_RESULT: BLOCK drives bounded rework, NOT gate-unobservable (verdict wins, even on rc!=0)" +( D="$ROOT/s5"; mkdir -p "$D/state.d" + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" + export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } + BS="$(git rev-parse HEAD)"; tgr_build_output + # Observed BLOCK + a non-structural major finding AND a non-zero exit — the + # verdict must still win (not reclassified gate-unobservable). + cat > "$D/repo/ctl/review.out" <<'EOF' +FINDING_BEGIN +severity: major +structural: false +region: src/a.txt:1-1 +region_lines: 8 +pattern_tags: [in-scope-fix] +summary: tighten the in-scope block +evidence: src/a.txt:1 needs a fix +FINDING_END +REVIEW_RESULT: BLOCK in-scope finding +EOF + printf '1\n' > "$D/repo/ctl/review.rc" # verdict present AND rc!=0 → verdict wins + cat > "$D/repo/ctl/do_rework" < src/a.txt +git add -A >/dev/null 2>&1; git commit -q -m "rework: in-scope fix to src/a.txt" >/dev/null 2>&1 +printf 'FILE_REVIEWED_NO_FINDINGS: src/a.txt\nREVIEW_RESULT: PASS\n' > "$D/repo/ctl/review.out" +printf '0\n' > "$D/repo/ctl/review.rc" +EOF + : > "$D/s5.log" + st="$(gate_one docs/tdd/0099-fix.md "$BS" "$D/s5.log")"; rc=$? + F="$STATE_DIR/0099-fix.json" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" != "gate-unobservable" ] \ + && ok "an observed BLOCK is NOT reclassified gate-unobservable (got halt_cause='$hc')" || bad "observed BLOCK must not become gate-unobservable" + grep -q '"outcome":"shipped"' "$F" 2>/dev/null \ + && ok "the observed BLOCK drove a bounded rework attempt (verdict-presence discriminator)" || bad "observed BLOCK should drive rework (rework_log: $(_read_fragment_raw_array "$F" rework_log))" +) || true + # =========================================================================== # §6: enum membership + status.sh render (Component 3). set_halt_cause # gate-unobservable returns 0 and writes the cause; the first next-action From abd4721aa120d4ce7e2aca94fbdc7d60461ec475 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 10:17:20 -0400 Subject: [PATCH 11/21] =?UTF-8?q?step(3):=20no-verdict=20review=20subproce?= =?UTF-8?q?ss=20=E2=86=92=20resumable=20gate-unobservable=20(TDD=200040=20?= =?UTF-8?q?Component=202,=20FR-57/NFR-4/ADR=200006)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the gate-agnostic _classify_gate_no_verdict helper (set_halt_cause gate-unobservable + _terminal_state blocked, in that order) and _gate_output_tail (stderr/output-tail detail), and rewire both no-verdict paths in _rework_loop (the rc!=0-no-fresh-verdict path and the neither-PASS-nor-BLOCK crash guard) from the old terminal 'failed' to the resumable gate-unobservable halt. The discriminator is verdict-presence, never exit code — an observed BLOCK/PASS is untouched. The verify-runtime call site (gate_one in lib/resume.sh, outside this TDD's declared ## Touched files) reuses the same gate-agnostic classifier. Co-Authored-By: Claude Opus 4.8 --- scripts/lib/gates.sh | 64 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 9c48bb9..ee86252 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -1806,6 +1806,47 @@ _rework_escalate() { # &2 } +# _gate_output_tail — TDD 0040 §2. Echo the last non-blank +# line of the gate log slice produced AFTER bytes (the subprocess +# output for THIS pass), stripped of control chars and clipped, for use as the +# couldn't-observe `halt_cause_detail` (e.g. a `timeout … No such file` exec +# error). Read-once (FR-74 #6) over a fixed byte offset; fail-loud (#1) with a +# clear marker when the log is unreadable rather than silently emitting empty. +_gate_output_tail() { # + local log="$1" pre="${2:-0}" + [ -r "$log" ] || { printf '(gate log unreadable: %s)' "$log"; return 0; } + case "$pre" in ''|*[!0-9]*) pre=0 ;; esac + local tail_line + tail_line="$(tail -c +"$((pre + 1))" "$log" 2>/dev/null \ + | grep -avE '^[[:space:]]*$' \ + | tail -n 1 \ + | tr -d '\000-\010\013\014\016-\037' \ + | cut -c1-200)" + [ -n "$tail_line" ] && printf '%s' "$tail_line" || printf '(no output captured)' +} + +# _classify_gate_no_verdict — TDD 0040 §2 (FR-57, NFR-4, +# ADR 0006). A review/runtime-verify gate SUBPROCESS that exited leaving NO +# parseable verdict line is couldn't-observe, not observed-wrong: a missing +# verdict is the absence of an artifact, so it cannot BE a verdict. Record a +# *resumable* gate-unobservable blocked halt — NOT a terminal `failed` — so the +# gate is simply re-run on the next resume (Component 3 maps gate-unobservable to +# a resume-first action list, making the blocked fragment auto-resumable via +# _resume_from's blocked arm). names which gate (review|verify-runtime); +# the detail carries the gate + the captured stderr/output tail so the operator +# can see WHY the gate could not run. set_halt_cause FIRST then _terminal_state +# blocked (TDD 0040 §2 order): set_halt_cause preserves the current status while +# writing the halt fields, and _terminal_state blocked then carries those fields +# forward, so the fragment ends at status=blocked with the cause intact. This +# helper is gate-agnostic: the review gate (_rework_loop, below) drives it; the +# verify-runtime call site (gate_one in lib/resume.sh) reuses the SAME classifier. +_classify_gate_no_verdict() { # + local slug="$1" gate="$2" tail="$3" + set_halt_cause "$slug" gate-unobservable "$gate" "$gate gate emitted no parseable verdict: $tail" \ + || echo "warning: _classify_gate_no_verdict: set_halt_cause failed for $slug ($gate)" >&2 + _terminal_state "$slug" blocked "" "$gate gate produced no parseable verdict (couldn't observe; resumable, re-runs the gate)" +} + # _rework_loop — the bounded automatic rework loop # (FR-61, FR-62, FR-65, FR-66, FR-67). Runs the review gate; on a PASS verdict # returns 0 (converged). On a halting finding it either escalates (structural @@ -1827,7 +1868,7 @@ _rework_loop() { # # the loop's state writes. case "$step" in ''|*[!0-9]*) step=1 ;; esac local max="${THROUGHLINE_REWORK_MAX:-3}"; case "$max" in ''|*[!0-9]*) max=3 ;; esac - local build_start="$rbase" cleared attempts rrc rs _retries_json + local build_start="$rbase" cleared attempts rrc rs # §3c re-review state. Declared local so review_one sees REVIEW_ATTENTION_DIRECTIVE # via dynamic scope only while this loop runs; RFIND_RE_REVIEW_DIRECTIVE is set by # _per_file_coverage_check when coverage is incomplete. @@ -1865,21 +1906,26 @@ _rework_loop() { # # becomes a refinement of the fail path, not the only fail trigger. verdict_in_new="$(_fresh_review_verdict "$log" "$pre_log_size")" if [ "$rrc" -ne 0 ] && [ -z "$verdict_in_new" ]; then - _retries_json="$(_read_fragment_raw_array "${STATE_DIR:-}/$slug.json" retries 2>/dev/null)" - if [ -n "$_retries_json" ] && [ "$_retries_json" != "[]" ]; then - _terminal_state "$slug" failed "" "review gate fatal exit after retries (rc=$rrc; no fresh verdict)" - else - _terminal_state "$slug" failed "" "review gate fatal exit, no retries recorded and no fresh verdict (rc=$rrc)" - fi + # TDD 0040 §2 (FR-57, NFR-4, ADR 0006): the review subprocess exited + # leaving NO parseable REVIEW_RESULT line — couldn't-observe, NOT + # observed-wrong. Record a resumable gate-unobservable blocked halt with the + # captured output tail as detail, instead of the old terminal `failed`. The + # discriminator is verdict-presence (a mechanical check on the output), + # never the exit code; the retries-recorded distinction folds into the tail. + _classify_gate_no_verdict "$slug" review "$(_gate_output_tail "$log" "$pre_log_size") (rc=$rrc)" return 1 fi # Prefer the fresh-pass verdict over the cumulative log tail; review_status # is the legacy fallback for callers that didn't snapshot pre_log_size. rs="${verdict_in_new:-$(review_status "$log")}" - # Crash guard: a pass that produced neither verdict is a fatal/garbled run. + # Crash guard: a pass that produced neither verdict is couldn't-observe — a + # garbled/empty run (rc may even be 0). TDD 0040 §2: a malformed/absent + # verdict resolves to gate-unobservable (couldn't-observe), never a guessed + # PASS/FAIL (NFR-4), so the gate is re-run rather than recorded as a false + # terminal verdict. case "$rs" in *PASS*|*BLOCK*) : ;; - *) _terminal_state "$slug" failed "" "review: no REVIEW_RESULT line"; return 1 ;; + *) _classify_gate_no_verdict "$slug" review "$(_gate_output_tail "$log" "$pre_log_size")"; return 1 ;; esac # TDD 0021 §2/§4 (FR-58): record this pass's findings onto findings[] and # drive the halt boundary off the {blocker,major} subset — NOT the From e3c309951319246d5f9751891c9bb53fbfbd6daf Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 10:36:37 -0400 Subject: [PATCH 12/21] =?UTF-8?q?fix(step=203):=20correct=20comment-vs-cod?= =?UTF-8?q?e=20=E2=80=94=20verify-runtime=20call=20site=20is=20NOT=20rewir?= =?UTF-8?q?ed?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The _classify_gate_no_verdict docstring and the §4/header eval comments claimed (present tense) the verify-runtime call site reuses the classifier. It does not: resume.sh is outside this TDD's ## Touched files and its no-verdict path still records terminal 'failed'. Restate accurately: the helper is gate-AGNOSTIC and ready, but only the review gate drives it in this TDD; wiring the verify-runtime call site is a follow-up within resume.sh's scope (ADR 0006 honest comments). Co-Authored-By: Claude Opus 4.8 --- scripts/lib/gates.sh | 9 ++++++--- tests/transient-gate-resilience.test.sh | 17 ++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index ee86252..3e999c5 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -1837,9 +1837,12 @@ _gate_output_tail() { # # can see WHY the gate could not run. set_halt_cause FIRST then _terminal_state # blocked (TDD 0040 §2 order): set_halt_cause preserves the current status while # writing the halt fields, and _terminal_state blocked then carries those fields -# forward, so the fragment ends at status=blocked with the cause intact. This -# helper is gate-agnostic: the review gate (_rework_loop, below) drives it; the -# verify-runtime call site (gate_one in lib/resume.sh) reuses the SAME classifier. +# forward, so the fragment ends at status=blocked with the cause intact. The +# helper is gate-AGNOSTIC by design (it takes as a parameter): in THIS TDD +# only the review gate (_rework_loop, below) drives it — the verify-runtime +# no-verdict path lives in gate_one in lib/resume.sh, which is OUTSIDE this TDD's +# declared ## Touched files and still records the old terminal `failed`; rewiring +# that one call site to this classifier is a follow-up within resume.sh's scope. _classify_gate_no_verdict() { # local slug="$1" gate="$2" tail="$3" set_halt_cause "$slug" gate-unobservable "$gate" "$gate gate emitted no parseable verdict: $tail" \ diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index 86a3362..625354c 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -14,10 +14,11 @@ # (a resumable blocked halt) instead of a terminal `failed` — couldn't-observe # is not observed-wrong (ADR 0006 / NFR-4). The classification is a # gate-agnostic helper (_classify_gate_no_verdict) the review gate (_rework_loop -# in lib/gates.sh) drives; the verify-runtime gate's terminal-state write lives -# in lib/resume.sh's gate_one (OUTSIDE this TDD's declared ## Touched files), so -# §4 exercises the helper with gate=verify-runtime to pin the gate-agnostic -# classification the verify call site reuses. +# in lib/gates.sh) drives. The verify-runtime gate's terminal-state write lives +# in lib/resume.sh's gate_one (OUTSIDE this TDD's declared ## Touched files) and +# is NOT rewired here — it still records the old terminal `failed`; §4 exercises +# the helper with gate=verify-runtime to pin that the classification is +# gate-agnostic, so a follow-up can wire that call site to it. # # Component 3 — enum + render mirror. `gate-unobservable` is admitted by the # closed FR-63 halt-cause enum with a resume-first next-action list (state.sh) @@ -217,9 +218,11 @@ echo "[§3] review subprocess emits NO REVIEW_RESULT → gate-unobservable (resu # §4: verify no-verdict → gate-unobservable (gate=verify-runtime). The verify-gate # terminal-state write lives in lib/resume.sh's gate_one (OUTSIDE this TDD's -# declared ## Touched files), so this exercises the gate-agnostic classifier the -# verify call site reuses: _classify_gate_no_verdict verify-runtime -# must record the SAME resumable gate-unobservable halt with gate=verify-runtime. +# declared ## Touched files) and is NOT rewired here — it still records the old +# terminal `failed`. This pins that the classifier is gate-AGNOSTIC so a future +# follow-up can wire that call site to it: _classify_gate_no_verdict +# verify-runtime records the SAME resumable gate-unobservable halt with +# gate=verify-runtime that the review path produces. echo "[§4] verify-runtime no-verdict → gate-unobservable via the gate-agnostic helper (gate=verify-runtime)" ( D="$ROOT/s4"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" From 573ef5a4b54203ee29efe31278f9627aeb50214f Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 10:49:51 -0400 Subject: [PATCH 13/21] =?UTF-8?q?step(4):=20finalize=20the=20transient-gat?= =?UTF-8?q?e-resilience=20eval=20=E2=80=94=20TDD=20Failure-modes=20coverag?= =?UTF-8?q?e=20(TDD=200040=20=C2=A74)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the remaining ## Failure modes & edge cases assertions over behavior already delivered in steps 1-3 (no production-code change): double-flake bounded to FAIL (retry-once is not retry-until-green), RETRIES=2 raises the bound, and a malformed/truncated verdict resolves to gate-unobservable (NFR-4: ambiguity is couldn't-observe, never a guessed verdict). Pure test-coverage hardening — the production code is unchanged, so this step is legitimately no-new-behavior. Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 49 +++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index 625354c..c4561a8 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -285,6 +285,55 @@ EOF && ok "the observed BLOCK drove a bounded rework attempt (verdict-presence discriminator)" || bad "observed BLOCK should drive rework (rework_log: $(_read_fragment_raw_array "$F" rework_log))" ) || true +# =========================================================================== +# §E: Failure-modes & edge cases from the TDD (coverage hardening for behavior +# already delivered in steps 1-3; no production change). +# §E1 double-flake (initial + retry both fail) → FAIL — retry-once is bounded, it +# is NOT a retry-until-green loop (which could mask a 50%-flaky real failure). +# §E2 RETRIES=2 raises the bound: a stub that passes only on the 3rd attempt +# recovers — the knob governs how many re-observations are allowed. +# §E3 a malformed/truncated review verdict (matches ^REVIEW_RESULT: but is neither +# PASS nor BLOCK) resolves to gate-unobservable (couldn't-observe), never a +# guessed PASS/FAIL (NFR-4: ambiguity resolves to couldn't-observe). +echo "[§E] Failure-modes: double-flake bounded to FAIL; RETRIES=2 raises the bound; malformed verdict → gate-unobservable" +( D="$ROOT/sE"; mkdir -p "$D"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + mkdir -p "$D/state.d"; TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + # A stub that passes only on its Nth invocation (counter-keyed), N from $PASS_ON. + cnt="$D/ec.count" + cat > "$D/ci-passon.sh" </dev/null || echo 0) + 1 )); echo "\$n" > "$cnt" +[ "\$n" -ge "\${PASS_ON:-99}" ] && exit 0 || exit 1 +EOF + chmod +x "$D/ci-passon.sh"; export CI_CHECKS="$D/ci-passon.sh" + + # §E1: passes only on attempt 3, but RETRIES=1 allows only 2 attempts → FAIL. + printf '0\n' > "$cnt"; : > "$D/e1.log" + PASS_ON=3 THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks "$D/e1.log"; rc=$? + [ "$rc" -ne 0 ] && ok "double-flake under RETRIES=1 → FAIL (retry-once is bounded, not retry-until-green)" || bad "RETRIES=1 must not recover a 3rd-attempt pass (got rc=$rc)" + + # §E2: same stub, RETRIES=2 → 3 attempts allowed → recovers. + printf '0\n' > "$cnt"; : > "$D/e2.log" + PASS_ON=3 THROUGHLINE_CI_CHECKS_RETRIES=2 run_ci_checks "$D/e2.log"; rc2=$? + [ "$rc2" -eq 0 ] && ok "RETRIES=2 raises the bound (a 3rd-attempt pass recovers; the knob governs)" || bad "RETRIES=2 should allow 3 attempts (got rc=$rc2)" + + # §E3: malformed/truncated review verdict → gate-unobservable. + export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" + tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } + BS="$(git rev-parse HEAD)"; tgr_build_output + # Matches ^REVIEW_RESULT: but is neither PASS nor BLOCK — a truncated/garbled line. + printf 'REVIEW_RESULT: \n' > "$D/repo/ctl/review.out" + printf '0\n' > "$D/repo/ctl/review.rc" + : > "$D/e3.log" + st="$(gate_one docs/tdd/0099-fix.md "$BS" "$D/e3.log")"; rce=$? + F="$STATE_DIR/0099-fix.json" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" = "gate-unobservable" ] \ + && ok "a malformed/truncated verdict resolves to gate-unobservable (NFR-4: no guessed verdict)" || bad "malformed verdict should be gate-unobservable (got halt_cause='$hc', rc=$rce)" +) || true + # =========================================================================== # §6: enum membership + status.sh render (Component 3). set_halt_cause # gate-unobservable returns 0 and writes the cause; the first next-action From b52929f325cf6c2ac1e7c198b742d43faff9d247 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 11:06:10 -0400 Subject: [PATCH 14/21] fix(step 4): guard tgr_build_output + git rev-parse call sites (FR-74 #1 fail-loud) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A setup failure in tgr_build_output (or the preceding git rev-parse) would silently drop the downstream assertion with no bad() record. Guard all three call sites (§3, §5, §E) with || { bad ...; exit 0; } so a fixture failure is surfaced loud, not swallowed. Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index c4561a8..9edc259 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -193,7 +193,8 @@ echo "[§3] review subprocess emits NO REVIEW_RESULT → gate-unobservable (resu TDDS=() THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } - BS="$(git rev-parse HEAD)"; tgr_build_output + BS="$(git rev-parse HEAD)" || { bad "git rev-parse failed"; exit 0; } + tgr_build_output || { bad "tgr_build_output failed"; exit 0; } # No-verdict review: emit the exec-error tail to stdout (captured to the gate # log by _claude_call) and exit non-zero, with NO REVIEW_RESULT line. printf "timeout: failed to run command 'claude': No such file or directory\n" > "$D/repo/ctl/review.out" @@ -253,7 +254,8 @@ echo "[§5] observed REVIEW_RESULT: BLOCK drives bounded rework, NOT gate-unobse TDDS=() THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } - BS="$(git rev-parse HEAD)"; tgr_build_output + BS="$(git rev-parse HEAD)" || { bad "git rev-parse failed"; exit 0; } + tgr_build_output || { bad "tgr_build_output failed"; exit 0; } # Observed BLOCK + a non-structural major finding AND a non-zero exit — the # verdict must still win (not reclassified gate-unobservable). cat > "$D/repo/ctl/review.out" <<'EOF' @@ -322,7 +324,8 @@ EOF # §E3: malformed/truncated review verdict → gate-unobservable. export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } - BS="$(git rev-parse HEAD)"; tgr_build_output + BS="$(git rev-parse HEAD)" || { bad "git rev-parse failed"; exit 0; } + tgr_build_output || { bad "tgr_build_output failed"; exit 0; } # Matches ^REVIEW_RESULT: but is neither PASS nor BLOCK — a truncated/garbled line. printf 'REVIEW_RESULT: \n' > "$D/repo/ctl/review.out" printf '0\n' > "$D/repo/ctl/review.rc" From 05ae5f030ff4fbd8e73bccae502221eb58440bcb Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 11:19:39 -0400 Subject: [PATCH 15/21] =?UTF-8?q?test(failing):=20aggregator=20must=20regi?= =?UTF-8?q?ster=20the=20transient-gate-resilience=20eval=20(TDD=200040=20?= =?UTF-8?q?=C2=A75,=20TDD=200038=20=C2=A73=20wire-in=20rule)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 28 +++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index 9edc259..f2a22eb 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -385,6 +385,34 @@ echo "[§6b] status.sh surfaces gate-unobservable as resumable=blocked with no u && ok "the gate-unobservable cause label appears in the halt render" || bad "render should name gate-unobservable (got: $out)" ) || true +# =========================================================================== +# §W: dogfood (TDD 0040 §5 / TDD 0038 §3 wire-in rule). Wiring this eval into the +# aggregator is NEW gating behavior — the registration must make the aggregator's +# exit go non-zero when THIS eval fails. Structural: the eval is referenced in +# tests/implement-gate.test.sh. Behavioral: the aggregator's real final AND-chain +# (extracted verbatim, driven against stub integers — no recursion) evaluates +# false when TGR_FAIL=1. Before the wire-in the chain never references TGR_FAIL, +# so it is true (RED); after, it includes [ "$TGR_FAIL" -eq 0 ] (GREEN). +echo "[§W] the eval is wired into the aggregator and propagates failure (TDD 0040 §5)" +( AGG="$REPO/tests/implement-gate.test.sh" + [ -r "$AGG" ] || { bad "INFRA: §W — aggregator unreadable: $AGG"; exit 0; } + grep -qE 'transient-gate-resilience\.test\.sh' "$AGG" \ + && ok "the eval is registered in the aggregator" || bad "implement-gate.test.sh should register transient-gate-resilience.test.sh" + chain="$(grep -aE '^\[ "\$FAIL" -eq 0 \] &&' "$AGG" | tail -1)" + [ -n "$chain" ] || { bad "INFRA: §W — could not locate the aggregator final AND-chain"; exit 0; } + drive_rc="$( + set +u + for v in $(printf '%s' "$chain" | grep -aoE '\$[A-Za-z_][A-Za-z0-9_]*' | tr -d '$' | sort -u); do + eval "$v=0" + done + TGR_FAIL=1 + eval "$chain"; echo $? + )" + [ "$drive_rc" != "0" ] \ + && ok "aggregator final AND-chain goes non-zero when this eval fails (wire-in propagates)" \ + || bad "aggregator AND-chain must be non-zero with TGR_FAIL=1 (got rc=$drive_rc)" +) || true + # --- report ---------------------------------------------------------------- # Fail loud (FR-74 #1): the result tally is what makes every assertion above # enforceable — the final `[ "$FAIL" -eq 0 ]` sets the script's exit code, so a From 77a7eb811467243e80932779a3aa6ba0a0d8b89b Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 11:20:36 -0400 Subject: [PATCH 16/21] =?UTF-8?q?step(5):=20wire=20transient-gate-resilien?= =?UTF-8?q?ce=20eval=20into=20the=20implement-gate=20aggregator=20(TDD=200?= =?UTF-8?q?040=20=C2=A75)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Register the eval (run it) and add [ "$TGR_FAIL" -eq 0 ] to the final AND-chain so ci-checks regression-gates Components 1-3. New gating behavior driven red→green by the eval's §W dogfood (05ae5f0) per the TDD 0038 §3 wire-in rule. Co-Authored-By: Claude Opus 4.8 --- tests/implement-gate.test.sh | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/implement-gate.test.sh b/tests/implement-gate.test.sh index 4286a69..0903491 100755 --- a/tests/implement-gate.test.sh +++ b/tests/implement-gate.test.sh @@ -656,4 +656,20 @@ if [ -f "$TFP" ]; then bash "$TFP" || TFP_FAIL=1 fi -[ "$FAIL" -eq 0 ] && [ "$RPV_FAIL" -eq 0 ] && [ "$TSR_FAIL" -eq 0 ] && [ "$BTS_FAIL" -eq 0 ] && [ "$SMS_FAIL" -eq 0 ] && [ "$PRM_FAIL" -eq 0 ] && [ "$GRM_FAIL" -eq 0 ] && [ "$BRL_FAIL" -eq 0 ] && [ "$SCB_FAIL" -eq 0 ] && [ "$RR_FAIL" -eq 0 ] && [ "$BCL_FAIL" -eq 0 ] && [ "$BO_FAIL" -eq 0 ] && [ "$IDP_FAIL" -eq 0 ] && [ "$RES_FAIL" -eq 0 ] && [ "$CVR_FAIL" -eq 0 ] && [ "$HRS_FAIL" -eq 0 ] && [ "$SHR_FAIL" -eq 0 ] && [ "$BPL_FAIL" -eq 0 ] && [ "$BDN_FAIL" -eq 0 ] && [ "$IDISC_FAIL" -eq 0 ] && [ "$ERC_FAIL" -eq 0 ] && [ "$SCP_FAIL" -eq 0 ] && [ "$IMR_FAIL" -eq 0 ] && [ "$RVR_FAIL" -eq 0 ] && [ "$WIC_FAIL" -eq 0 ] && [ "$RTH_FAIL" -eq 0 ] && [ "$TFP_FAIL" -eq 0 ] +# Run the transient-gate-resilience eval (TDD 0040 / FR-15, FR-57, NFR-4; ADR +# 0004, 0006, 0007) as part of the same suite so the ci-checks retry-once loop +# (Component 1), the gate-unobservable no-verdict classification + the gate-agnostic +# _classify_gate_no_verdict / _gate_output_tail helpers (Component 2), and the +# closed-enum + status-render mirror for gate-unobservable (Component 3) are +# regression-gated by ci-checks, not orphaned from the aggregator. Per the +# TDD 0038 §3 wire-in rule this registration is new gating behavior — its failing +# wire-in test (the eval's §W dogfood) drove the AND-chain term below red→green +# before this block landed. +TGR="$(dirname "$0")/transient-gate-resilience.test.sh" +TGR_FAIL=0 +if [ -f "$TGR" ]; then + echo + bash "$TGR" || TGR_FAIL=1 +fi + +[ "$FAIL" -eq 0 ] && [ "$RPV_FAIL" -eq 0 ] && [ "$TSR_FAIL" -eq 0 ] && [ "$BTS_FAIL" -eq 0 ] && [ "$SMS_FAIL" -eq 0 ] && [ "$PRM_FAIL" -eq 0 ] && [ "$GRM_FAIL" -eq 0 ] && [ "$BRL_FAIL" -eq 0 ] && [ "$SCB_FAIL" -eq 0 ] && [ "$RR_FAIL" -eq 0 ] && [ "$BCL_FAIL" -eq 0 ] && [ "$BO_FAIL" -eq 0 ] && [ "$IDP_FAIL" -eq 0 ] && [ "$RES_FAIL" -eq 0 ] && [ "$CVR_FAIL" -eq 0 ] && [ "$HRS_FAIL" -eq 0 ] && [ "$SHR_FAIL" -eq 0 ] && [ "$BPL_FAIL" -eq 0 ] && [ "$BDN_FAIL" -eq 0 ] && [ "$IDISC_FAIL" -eq 0 ] && [ "$ERC_FAIL" -eq 0 ] && [ "$SCP_FAIL" -eq 0 ] && [ "$IMR_FAIL" -eq 0 ] && [ "$RVR_FAIL" -eq 0 ] && [ "$WIC_FAIL" -eq 0 ] && [ "$RTH_FAIL" -eq 0 ] && [ "$TFP_FAIL" -eq 0 ] && [ "$TGR_FAIL" -eq 0 ] From 0161f26646474e3d553f4dc871b8bd998d908cd1 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 11:34:44 -0400 Subject: [PATCH 17/21] =?UTF-8?q?chore:=20give=20=C2=A7E3=20captured=20gat?= =?UTF-8?q?e=5Fone=20output=20diagnostic=20use=20(clear=20SC2034)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The §E3 'st' capture of gate_one's output was unused (SC2034). Reference it in the failure diagnostic. The remaining TDDS=() (consumed by the sourced implement.sh via dynamic scope, required by the SOURCE_ONLY guard) and TGR_FAIL=1 (consumed inside eval "$chain" in the §W dogfood, mirroring TFP §8) are necessary idioms shellcheck-static cannot see — present in every sibling eval. Co-Authored-By: Claude Opus 4.8 --- tests/transient-gate-resilience.test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index f2a22eb..a32b01a 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -334,7 +334,7 @@ EOF F="$STATE_DIR/0099-fix.json" hc="$(_read_fragment_field "$F" halt_cause)" [ "$hc" = "gate-unobservable" ] \ - && ok "a malformed/truncated verdict resolves to gate-unobservable (NFR-4: no guessed verdict)" || bad "malformed verdict should be gate-unobservable (got halt_cause='$hc', rc=$rce)" + && ok "a malformed/truncated verdict resolves to gate-unobservable (NFR-4: no guessed verdict)" || bad "malformed verdict should be gate-unobservable (got halt_cause='$hc', rc=$rce, st='$st')" ) || true # =========================================================================== From 5843b35fa3dd4929f14164cb411ef93ecff95b81 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 15:32:56 -0400 Subject: [PATCH 18/21] =?UTF-8?q?test(failing):=20=C2=A77=20set=5Fhalt=5Fc?= =?UTF-8?q?ause=20write-failure=20=E2=86=92=20=5Fclassify=5Fgate=5Fno=5Fve?= =?UTF-8?q?rdict=20fails=20loud=20(TDD=200040=20=C2=A77)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tests/transient-gate-resilience.test.sh | 38 ++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh index a32b01a..783cd9c 100644 --- a/tests/transient-gate-resilience.test.sh +++ b/tests/transient-gate-resilience.test.sh @@ -24,7 +24,7 @@ # closed FR-63 halt-cause enum with a resume-first next-action list (state.sh) # and rendered without an unknown-cause warning (status.sh). # -# Covers the TDD's Verification plan §1-§6, following the fixture pattern of +# Covers the TDD's Verification plan §1-§7, following the fixture pattern of # tests/runtime-verify-resume.test.sh (§5/§6 enum + render) and # tests/structural-classification-bound.test.sh (a stub `claude` review gate # driving the real gate_one + _rework_loop). Stubs mean no model or tokens are @@ -36,6 +36,7 @@ # §4 verify no-verdict → gate-unobservable (gate=verify-runtime) via the gate-agnostic helper # §5 observed REVIEW_RESULT: BLOCK is UNTOUCHED (discriminator is verdict-presence) # §6 enum membership + status.sh render (resumable=blocked; no unknown-cause warning) +# §7 set_halt_cause write-failure → _classify_gate_no_verdict fails loud (no stranded halt) # # Run: bash tests/transient-gate-resilience.test.sh set -uo pipefail @@ -385,6 +386,41 @@ echo "[§6b] status.sh surfaces gate-unobservable as resumable=blocked with no u && ok "the gate-unobservable cause label appears in the halt render" || bad "render should name gate-unobservable (got: $out)" ) || true +# =========================================================================== +# §7: set_halt_cause write-failure fails loud (no silent non-resumable halt). +# Simulate a set_halt_cause write failure by overriding it (after sourcing) to +# return non-zero with a diagnostic. _classify_gate_no_verdict must propagate +# the failure (return non-zero) — NOT silently leave a blocked fragment lacking +# halt_cause=gate-unobservable, which would strand it as non-resumable and +# defeat §2's auto-resumability guarantee (TDD 0040 failure-modes edge case). +echo "[§7] set_halt_cause write-failure → _classify_gate_no_verdict fails loud (no stranded halt)" +( D="$ROOT/s7"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + _write_tdd_fragment 0040-w7 40 docs/tdd/0040-w7.md 1 reviewing review \ + 1000 1000 "feat/0040-w7" "" log "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" + F="$STATE_DIR/0040-w7.json" + [ -f "$F" ] || { bad "fixture fragment not created"; exit 0; } + # Override set_halt_cause after sourcing to simulate a write failure. + set_halt_cause() { echo "error: set_halt_cause: simulated write failure for §7" >&2; return 1; } + diag="$(_classify_gate_no_verdict 0040-w7 review "exec error tail" 2>&1)"; rc=$? + [ "$rc" -ne 0 ] && ok "_classify_gate_no_verdict returns non-zero when set_halt_cause fails" \ + || bad "_classify_gate_no_verdict must propagate write failure (got rc=$rc)" + printf '%s' "$diag" | grep -qiE 'set_halt_cause|write fail|error' \ + && ok "_classify_gate_no_verdict propagates a diagnostic on write failure" \ + || bad "write failure should surface a diagnostic (got: '$diag')" + # Fragment must NOT end at status=blocked without halt_cause=gate-unobservable — + # a half-classified blocked fragment is non-resumable and stranded (defeats §2). + stt="$(sed -n 's/.*"status":"\([^"]*\)".*/\1/p' "$F" | head -1)" + hc="$(_read_fragment_field "$F" halt_cause)" + if [ "$stt" = "blocked" ] && [ "$hc" != "gate-unobservable" ]; then + bad "fragment must NOT be left status=blocked without halt_cause=gate-unobservable (stranded non-resumable)" + else + ok "fragment not left in a stranded non-resumable state (status='$stt' halt_cause='$hc')" + fi +) || true + # =========================================================================== # §W: dogfood (TDD 0040 §5 / TDD 0038 §3 wire-in rule). Wiring this eval into the # aggregator is NEW gating behavior — the registration must make the aggregator's From 6151afa63cc06b6eb78f37a579840fd36b5cbc6e Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 15:33:15 -0400 Subject: [PATCH 19/21] =?UTF-8?q?fix:=20fail-loud=20on=20set=5Fhalt=5Fcaus?= =?UTF-8?q?e=20write=20failure=20in=20=5Fclassify=5Fgate=5Fno=5Fverdict=20?= =?UTF-8?q?(TDD=200040=20=C2=A77)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/lib/gates.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 3e999c5..ed9caab 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -1846,7 +1846,7 @@ _gate_output_tail() { # _classify_gate_no_verdict() { # local slug="$1" gate="$2" tail="$3" set_halt_cause "$slug" gate-unobservable "$gate" "$gate gate emitted no parseable verdict: $tail" \ - || echo "warning: _classify_gate_no_verdict: set_halt_cause failed for $slug ($gate)" >&2 + || return 1 _terminal_state "$slug" blocked "" "$gate gate produced no parseable verdict (couldn't observe; resumable, re-runs the gate)" } From a95ec2c1f7a40e2c4a4bd0d6b0cfe69fafae5435 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 15:53:48 -0400 Subject: [PATCH 20/21] mark 0040-transient-gate-failure-resilience implemented (verified + reviewed) --- docs/tdd/0040-transient-gate-failure-resilience.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tdd/0040-transient-gate-failure-resilience.md b/docs/tdd/0040-transient-gate-failure-resilience.md index 3f7eb09..b60e445 100644 --- a/docs/tdd/0040-transient-gate-failure-resilience.md +++ b/docs/tdd/0040-transient-gate-failure-resilience.md @@ -1,6 +1,6 @@ # TDD 0040: Transient gate-failure resilience — ci-checks retry-once + no-verdict is couldn't-observe, not failed -Status: draft +Status: implemented PRD refs: FR-15 (gap-closure); FR-57 (gap-closure); NFR-4 PRD-rev: d289607 ADR constraints: 0004, 0005, 0006, 0007 From 6b3f407eb38584c6dc82c64c7c7f643c55c318b2 Mon Sep 17 00:00:00 2001 From: Chris Henesy Date: Tue, 9 Jun 2026 16:02:08 -0400 Subject: [PATCH 21/21] chore: bump plugin 3.21.0 -> 3.22.0 (TDD 0040 transient gate-failure resilience) --- .claude-plugin/plugin.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 64e7a79..e528b67 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "throughline", - "version": "3.21.0", + "version": "3.22.0", "description": "Throughline plugin: a PRD/TDD/ADR design-doc pipeline with phase-gate PRs, layered as a thin governance overlay on top of the official superpowers/pr-review-toolkit plugins (it owns governance; they own discovery + engineering). /prd-author requires an observable acceptance criterion per new requirement. /tdd-author decides how many TDDs a PRD change needs (git-diff + coverage), recommends ADR actions, requires an alternatives analysis for every new dependency, requires a verification plan (observable surface → observation point(s) → expected observations) per TDD, reads BLOCKERS.md so implementation-time design blockers feed back into design, self-reviews, then runs an independent design-critique gate (different model, fresh context) before the design PR. /implement builds every TDD merged to the integration branch unattended in detached processes on the best model (opus) but does not trust self-reported success: the flip to implemented is gated on failing-test-first discipline, a mechanical ci-checks gate (tests+typecheck+lint — CI's job), a runtime-verify gate that drives the built artifact at its observable surface (PASS/FAIL/BLOCKED/SKIP kept distinct; mechanism delegated, no harness vendored — ADR 0004), and an independent review on a DIFFERENT model (sonnet) for reviewer diversity — one stacked PR per TDD, downstream halt-on-failure, never merges. Includes a toolchain bootstrap skill and a format+lint hook for JS/TS, Python, Rust, and Go.", "author": { "name": "Chris Henesy"