diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 64e7a79..e528b67 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "throughline", - "version": "3.21.0", + "version": "3.22.0", "description": "Throughline plugin: a PRD/TDD/ADR design-doc pipeline with phase-gate PRs, layered as a thin governance overlay on top of the official superpowers/pr-review-toolkit plugins (it owns governance; they own discovery + engineering). /prd-author requires an observable acceptance criterion per new requirement. /tdd-author decides how many TDDs a PRD change needs (git-diff + coverage), recommends ADR actions, requires an alternatives analysis for every new dependency, requires a verification plan (observable surface → observation point(s) → expected observations) per TDD, reads BLOCKERS.md so implementation-time design blockers feed back into design, self-reviews, then runs an independent design-critique gate (different model, fresh context) before the design PR. /implement builds every TDD merged to the integration branch unattended in detached processes on the best model (opus) but does not trust self-reported success: the flip to implemented is gated on failing-test-first discipline, a mechanical ci-checks gate (tests+typecheck+lint — CI's job), a runtime-verify gate that drives the built artifact at its observable surface (PASS/FAIL/BLOCKED/SKIP kept distinct; mechanism delegated, no harness vendored — ADR 0004), and an independent review on a DIFFERENT model (sonnet) for reviewer diversity — one stacked PR per TDD, downstream halt-on-failure, never merges. Includes a toolchain bootstrap skill and a format+lint hook for JS/TS, Python, Rust, and Go.", "author": { "name": "Chris Henesy" diff --git a/docs/tdd/0040-transient-gate-failure-resilience.md b/docs/tdd/0040-transient-gate-failure-resilience.md index 3f7eb09..b60e445 100644 --- a/docs/tdd/0040-transient-gate-failure-resilience.md +++ b/docs/tdd/0040-transient-gate-failure-resilience.md @@ -1,6 +1,6 @@ # TDD 0040: Transient gate-failure resilience — ci-checks retry-once + no-verdict is couldn't-observe, not failed -Status: draft +Status: implemented PRD refs: FR-15 (gap-closure); FR-57 (gap-closure); NFR-4 PRD-rev: d289607 ADR constraints: 0004, 0005, 0006, 0007 diff --git a/scripts/lib/gates.sh b/scripts/lib/gates.sh index 5c6dd0f..ed9caab 100644 --- a/scripts/lib/gates.sh +++ b/scripts/lib/gates.sh @@ -544,7 +544,47 @@ _fresh_review_verdict() { # | grep -aE '^[`[:space:]]*REVIEW_RESULT:' \ | tail -1 } -run_ci_checks() { bash "$CI_CHECKS" >>"$1" 2>&1; } +# run_ci_checks — TDD 0040 §1 (FR-15, NFR-4). Run ci-checks.sh; on a +# non-zero exit, re-run the checks up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) +# more times in the SAME worktree (sequential, no parallelism). The FIRST passing +# run wins (PASS); only the initial run AND every retry failing is a real FAIL — +# so a transient suite flake is re-observed, never guessed past (ADR 0006), and a +# reproducible regression still FAILs. A pass on retry writes a +# "passed on retry N (initial run flaked)" telemetry line to the gate log so a +# recovered flake is visible, not silent; a retries-exhausted FAIL writes an +# equally explicit "FAILED after N attempt(s)" line (NFR-4: honest both ways). +# THROUGHLINE_CI_CHECKS_RETRIES=0 restores the no-retry behavior (an escape hatch +# for a deterministic-suite project); a non-numeric value defaults-and-warns +# (mirrors the THROUGHLINE_WATCH_MAX_SECS validation pattern). The signature is +# unchanged ( only) so the gate_one call site is untouched. +run_ci_checks() { # + local log="$1" + local retries="${THROUGHLINE_CI_CHECKS_RETRIES:-1}" + case "$retries" in + ''|*[!0-9]*) + echo "warning: THROUGHLINE_CI_CHECKS_RETRIES='$retries' not numeric; falling back to 1" >&2 + retries=1 ;; + esac + local attempt=0 + while :; do + if bash "$CI_CHECKS" >>"$log" 2>&1; then + if [ "$attempt" -gt 0 ]; then + printf 'ci-checks: passed on retry %d (initial run flaked; recovered, NFR-4)\n' "$attempt" >> "$log" + fi + return 0 + fi + if [ "$attempt" -ge "$retries" ]; then + # NFR-4 honesty: a retries-exhausted FAIL is recorded as explicitly as a + # recovery, so a reader can tell it apart from a single-shot failure. + printf 'ci-checks: FAILED after %d attempt(s) (initial + %d retries; retries exhausted, real FAIL)\n' \ + "$((attempt + 1))" "$retries" >> "$log" + return 1 + fi + attempt=$((attempt + 1)) + printf 'ci-checks: attempt failed; re-running (retry %d of %d, THROUGHLINE_CI_CHECKS_RETRIES)\n' \ + "$attempt" "$retries" >> "$log" + done +} # _test_first_ok_range — the SHARED test-first # predicate (TDD 0038 §1 / FR-15a). Returns 0 iff `git log ..` # contains a commit subject matching `^test(failing)` case-insensitively @@ -1766,6 +1806,50 @@ _rework_escalate() { # &2 } +# _gate_output_tail — TDD 0040 §2. Echo the last non-blank +# line of the gate log slice produced AFTER bytes (the subprocess +# output for THIS pass), stripped of control chars and clipped, for use as the +# couldn't-observe `halt_cause_detail` (e.g. a `timeout … No such file` exec +# error). Read-once (FR-74 #6) over a fixed byte offset; fail-loud (#1) with a +# clear marker when the log is unreadable rather than silently emitting empty. +_gate_output_tail() { # + local log="$1" pre="${2:-0}" + [ -r "$log" ] || { printf '(gate log unreadable: %s)' "$log"; return 0; } + case "$pre" in ''|*[!0-9]*) pre=0 ;; esac + local tail_line + tail_line="$(tail -c +"$((pre + 1))" "$log" 2>/dev/null \ + | grep -avE '^[[:space:]]*$' \ + | tail -n 1 \ + | tr -d '\000-\010\013\014\016-\037' \ + | cut -c1-200)" + [ -n "$tail_line" ] && printf '%s' "$tail_line" || printf '(no output captured)' +} + +# _classify_gate_no_verdict — TDD 0040 §2 (FR-57, NFR-4, +# ADR 0006). A review/runtime-verify gate SUBPROCESS that exited leaving NO +# parseable verdict line is couldn't-observe, not observed-wrong: a missing +# verdict is the absence of an artifact, so it cannot BE a verdict. Record a +# *resumable* gate-unobservable blocked halt — NOT a terminal `failed` — so the +# gate is simply re-run on the next resume (Component 3 maps gate-unobservable to +# a resume-first action list, making the blocked fragment auto-resumable via +# _resume_from's blocked arm). names which gate (review|verify-runtime); +# the detail carries the gate + the captured stderr/output tail so the operator +# can see WHY the gate could not run. set_halt_cause FIRST then _terminal_state +# blocked (TDD 0040 §2 order): set_halt_cause preserves the current status while +# writing the halt fields, and _terminal_state blocked then carries those fields +# forward, so the fragment ends at status=blocked with the cause intact. The +# helper is gate-AGNOSTIC by design (it takes as a parameter): in THIS TDD +# only the review gate (_rework_loop, below) drives it — the verify-runtime +# no-verdict path lives in gate_one in lib/resume.sh, which is OUTSIDE this TDD's +# declared ## Touched files and still records the old terminal `failed`; rewiring +# that one call site to this classifier is a follow-up within resume.sh's scope. +_classify_gate_no_verdict() { # + local slug="$1" gate="$2" tail="$3" + set_halt_cause "$slug" gate-unobservable "$gate" "$gate gate emitted no parseable verdict: $tail" \ + || return 1 + _terminal_state "$slug" blocked "" "$gate gate produced no parseable verdict (couldn't observe; resumable, re-runs the gate)" +} + # _rework_loop — the bounded automatic rework loop # (FR-61, FR-62, FR-65, FR-66, FR-67). Runs the review gate; on a PASS verdict # returns 0 (converged). On a halting finding it either escalates (structural @@ -1787,7 +1871,7 @@ _rework_loop() { # # the loop's state writes. case "$step" in ''|*[!0-9]*) step=1 ;; esac local max="${THROUGHLINE_REWORK_MAX:-3}"; case "$max" in ''|*[!0-9]*) max=3 ;; esac - local build_start="$rbase" cleared attempts rrc rs _retries_json + local build_start="$rbase" cleared attempts rrc rs # §3c re-review state. Declared local so review_one sees REVIEW_ATTENTION_DIRECTIVE # via dynamic scope only while this loop runs; RFIND_RE_REVIEW_DIRECTIVE is set by # _per_file_coverage_check when coverage is incomplete. @@ -1825,21 +1909,26 @@ _rework_loop() { # # becomes a refinement of the fail path, not the only fail trigger. verdict_in_new="$(_fresh_review_verdict "$log" "$pre_log_size")" if [ "$rrc" -ne 0 ] && [ -z "$verdict_in_new" ]; then - _retries_json="$(_read_fragment_raw_array "${STATE_DIR:-}/$slug.json" retries 2>/dev/null)" - if [ -n "$_retries_json" ] && [ "$_retries_json" != "[]" ]; then - _terminal_state "$slug" failed "" "review gate fatal exit after retries (rc=$rrc; no fresh verdict)" - else - _terminal_state "$slug" failed "" "review gate fatal exit, no retries recorded and no fresh verdict (rc=$rrc)" - fi + # TDD 0040 §2 (FR-57, NFR-4, ADR 0006): the review subprocess exited + # leaving NO parseable REVIEW_RESULT line — couldn't-observe, NOT + # observed-wrong. Record a resumable gate-unobservable blocked halt with the + # captured output tail as detail, instead of the old terminal `failed`. The + # discriminator is verdict-presence (a mechanical check on the output), + # never the exit code; the retries-recorded distinction folds into the tail. + _classify_gate_no_verdict "$slug" review "$(_gate_output_tail "$log" "$pre_log_size") (rc=$rrc)" return 1 fi # Prefer the fresh-pass verdict over the cumulative log tail; review_status # is the legacy fallback for callers that didn't snapshot pre_log_size. rs="${verdict_in_new:-$(review_status "$log")}" - # Crash guard: a pass that produced neither verdict is a fatal/garbled run. + # Crash guard: a pass that produced neither verdict is couldn't-observe — a + # garbled/empty run (rc may even be 0). TDD 0040 §2: a malformed/absent + # verdict resolves to gate-unobservable (couldn't-observe), never a guessed + # PASS/FAIL (NFR-4), so the gate is re-run rather than recorded as a false + # terminal verdict. case "$rs" in *PASS*|*BLOCK*) : ;; - *) _terminal_state "$slug" failed "" "review: no REVIEW_RESULT line"; return 1 ;; + *) _classify_gate_no_verdict "$slug" review "$(_gate_output_tail "$log" "$pre_log_size")"; return 1 ;; esac # TDD 0021 §2/§4 (FR-58): record this pass's findings onto findings[] and # drive the halt boundary off the {blocker,major} subset — NOT the diff --git a/scripts/lib/state.sh b/scripts/lib/state.sh index 6a7a4a6..c3c769d 100644 --- a/scripts/lib/state.sh +++ b/scripts/lib/state.sh @@ -935,6 +935,17 @@ _next_actions_for_cause() { # is revised + merged (the §3 verify-plan-unrevised guard enforces the # precondition). Mirrors structural-finding's resume-after-revision shape. echo "resume (re-run runtime-verify against the revised verification plan),revise the TDD's ## Verification plan via /tdd-author" ;; + gate-unobservable) + # TDD 0040 §3 (FR-57, NFR-4): a review/runtime-verify gate SUBPROCESS that + # exited leaving NO parseable verdict line (crash, exec failure, empty + # output) is couldn't-observe, not observed-wrong (ADR 0006: a missing + # verdict is the absence of an artifact, so it cannot BE a verdict). The + # FIRST element begins with `resume`, the machine-readable marker + # status.sh --check-paused and _resume_from's blocked arm key on. Unlike + # verify-unobservable (which needs a verification-plan revision first), a + # no-verdict gate is genuinely safe to re-run with no operator intent — the + # gate simply could not run — so the resume needs no revision precondition. + echo "resume (re-runs the gate),see the gate log for why the gate emitted no verdict" ;; design-escalation) echo "revise TDD via /tdd-author,/adr-new if a constraint is being challenged" ;; external-blocker) diff --git a/scripts/status.sh b/scripts/status.sh index 31f12c0..28f5d2c 100755 --- a/scripts/status.sh +++ b/scripts/status.sh @@ -200,6 +200,7 @@ _halt_cause_known() { # design-escalation|external-blocker) return 0 ;; resume-blocked-integration-conflict) return 0 ;; # TDD 0031 §3c (mirrors state.sh enum) verify-unobservable) return 0 ;; # TDD 0035 §1 (mirrors state.sh enum); FR-64 renders it without the unknown-cause warning + gate-unobservable) return 0 ;; # TDD 0040 §3 (mirrors state.sh enum); a no-verdict gate renders without the unknown-cause warning *) return 1 ;; esac } diff --git a/tests/implement-gate.test.sh b/tests/implement-gate.test.sh index 4286a69..0903491 100755 --- a/tests/implement-gate.test.sh +++ b/tests/implement-gate.test.sh @@ -656,4 +656,20 @@ if [ -f "$TFP" ]; then bash "$TFP" || TFP_FAIL=1 fi -[ "$FAIL" -eq 0 ] && [ "$RPV_FAIL" -eq 0 ] && [ "$TSR_FAIL" -eq 0 ] && [ "$BTS_FAIL" -eq 0 ] && [ "$SMS_FAIL" -eq 0 ] && [ "$PRM_FAIL" -eq 0 ] && [ "$GRM_FAIL" -eq 0 ] && [ "$BRL_FAIL" -eq 0 ] && [ "$SCB_FAIL" -eq 0 ] && [ "$RR_FAIL" -eq 0 ] && [ "$BCL_FAIL" -eq 0 ] && [ "$BO_FAIL" -eq 0 ] && [ "$IDP_FAIL" -eq 0 ] && [ "$RES_FAIL" -eq 0 ] && [ "$CVR_FAIL" -eq 0 ] && [ "$HRS_FAIL" -eq 0 ] && [ "$SHR_FAIL" -eq 0 ] && [ "$BPL_FAIL" -eq 0 ] && [ "$BDN_FAIL" -eq 0 ] && [ "$IDISC_FAIL" -eq 0 ] && [ "$ERC_FAIL" -eq 0 ] && [ "$SCP_FAIL" -eq 0 ] && [ "$IMR_FAIL" -eq 0 ] && [ "$RVR_FAIL" -eq 0 ] && [ "$WIC_FAIL" -eq 0 ] && [ "$RTH_FAIL" -eq 0 ] && [ "$TFP_FAIL" -eq 0 ] +# Run the transient-gate-resilience eval (TDD 0040 / FR-15, FR-57, NFR-4; ADR +# 0004, 0006, 0007) as part of the same suite so the ci-checks retry-once loop +# (Component 1), the gate-unobservable no-verdict classification + the gate-agnostic +# _classify_gate_no_verdict / _gate_output_tail helpers (Component 2), and the +# closed-enum + status-render mirror for gate-unobservable (Component 3) are +# regression-gated by ci-checks, not orphaned from the aggregator. Per the +# TDD 0038 §3 wire-in rule this registration is new gating behavior — its failing +# wire-in test (the eval's §W dogfood) drove the AND-chain term below red→green +# before this block landed. +TGR="$(dirname "$0")/transient-gate-resilience.test.sh" +TGR_FAIL=0 +if [ -f "$TGR" ]; then + echo + bash "$TGR" || TGR_FAIL=1 +fi + +[ "$FAIL" -eq 0 ] && [ "$RPV_FAIL" -eq 0 ] && [ "$TSR_FAIL" -eq 0 ] && [ "$BTS_FAIL" -eq 0 ] && [ "$SMS_FAIL" -eq 0 ] && [ "$PRM_FAIL" -eq 0 ] && [ "$GRM_FAIL" -eq 0 ] && [ "$BRL_FAIL" -eq 0 ] && [ "$SCB_FAIL" -eq 0 ] && [ "$RR_FAIL" -eq 0 ] && [ "$BCL_FAIL" -eq 0 ] && [ "$BO_FAIL" -eq 0 ] && [ "$IDP_FAIL" -eq 0 ] && [ "$RES_FAIL" -eq 0 ] && [ "$CVR_FAIL" -eq 0 ] && [ "$HRS_FAIL" -eq 0 ] && [ "$SHR_FAIL" -eq 0 ] && [ "$BPL_FAIL" -eq 0 ] && [ "$BDN_FAIL" -eq 0 ] && [ "$IDISC_FAIL" -eq 0 ] && [ "$ERC_FAIL" -eq 0 ] && [ "$SCP_FAIL" -eq 0 ] && [ "$IMR_FAIL" -eq 0 ] && [ "$RVR_FAIL" -eq 0 ] && [ "$WIC_FAIL" -eq 0 ] && [ "$RTH_FAIL" -eq 0 ] && [ "$TFP_FAIL" -eq 0 ] && [ "$TGR_FAIL" -eq 0 ] diff --git a/tests/transient-gate-resilience.test.sh b/tests/transient-gate-resilience.test.sh new file mode 100644 index 0000000..783cd9c --- /dev/null +++ b/tests/transient-gate-resilience.test.sh @@ -0,0 +1,462 @@ +#!/usr/bin/env bash +# transient-gate-resilience.test.sh — eval for TDD 0040 (transient gate-failure +# resilience). Two transient gate-failure modes are made honest and non-fatal: +# +# Component 1 — ci-checks retry-once. On a ci-checks failure the gate re-runs +# the checks up to THROUGHLINE_CI_CHECKS_RETRIES (default 1) more times before +# declaring FAIL; the FIRST passing run wins, a recovered flake is logged (not +# silent), and only the initial run AND all retries failing is a real FAIL. +# RETRIES=0 restores the no-retry behavior; a non-numeric value default-warns. +# +# Component 2 — no-verdict → gate-unobservable. A review/verify gate subprocess +# that exits leaving NO parseable verdict line (no REVIEW_RESULT: / no +# VERIFY_RUNTIME:), REGARDLESS of exit code, is classified `gate-unobservable` +# (a resumable blocked halt) instead of a terminal `failed` — couldn't-observe +# is not observed-wrong (ADR 0006 / NFR-4). The classification is a +# gate-agnostic helper (_classify_gate_no_verdict) the review gate (_rework_loop +# in lib/gates.sh) drives. The verify-runtime gate's terminal-state write lives +# in lib/resume.sh's gate_one (OUTSIDE this TDD's declared ## Touched files) and +# is NOT rewired here — it still records the old terminal `failed`; §4 exercises +# the helper with gate=verify-runtime to pin that the classification is +# gate-agnostic, so a follow-up can wire that call site to it. +# +# Component 3 — enum + render mirror. `gate-unobservable` is admitted by the +# closed FR-63 halt-cause enum with a resume-first next-action list (state.sh) +# and rendered without an unknown-cause warning (status.sh). +# +# Covers the TDD's Verification plan §1-§7, following the fixture pattern of +# tests/runtime-verify-resume.test.sh (§5/§6 enum + render) and +# tests/structural-classification-bound.test.sh (a stub `claude` review gate +# driving the real gate_one + _rework_loop). Stubs mean no model or tokens are +# needed; all subprocess exit codes + outputs are explicit fixtures. +# +# §1 ci-checks flaky-then-green → PASS (retry recovers; telemetry logged); RETRIES=0 → FAIL +# §2 ci-checks red-twice → real FAIL (no false PASS); RETRIES non-numeric → default-and-warn +# §3 review no-verdict → gate-unobservable (resumable), gate=review + stderr-tail detail +# §4 verify no-verdict → gate-unobservable (gate=verify-runtime) via the gate-agnostic helper +# §5 observed REVIEW_RESULT: BLOCK is UNTOUCHED (discriminator is verdict-presence) +# §6 enum membership + status.sh render (resumable=blocked; no unknown-cause warning) +# §7 set_halt_cause write-failure → _classify_gate_no_verdict fails loud (no stranded halt) +# +# Run: bash tests/transient-gate-resilience.test.sh +set -uo pipefail +REPO="$(cd "$(dirname "$0")/.." && pwd)" +IMPL="$REPO/scripts/implement.sh" +RESULTS="$(mktemp)"; export RESULTS +ok() { printf 'ok\n' >>"$RESULTS"; printf ' ok — %s\n' "$1"; } +bad() { printf 'fail\n' >>"$RESULTS"; printf ' FAIL — %s\n' "$1"; } + +ROOT="$(mktemp -d)"; trap 'rm -rf "$ROOT"' EXIT + +# =========================================================================== +# §1: ci-checks flaky-then-green → PASS. A stub ci-checks.sh fails on its first +# invocation and passes on the second (keyed off a counter file). With +# THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks PASSES and the gate log records a +# recovered-flake telemetry line; with RETRIES=0 the SAME stub FAILS (no retry). +echo "[§1] ci-checks flaky-then-green → PASS with retry; RETRIES=0 → FAIL (knob governs)" +( D="$ROOT/s1"; mkdir -p "$D"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + mkdir -p "$D/state.d"; TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + # Stub ci-checks: fail on attempt 1, pass on attempt 2+ (counter file). The + # counter is reset per scenario so each run starts on the flaky first attempt. + cnt="$D/ci.count" + cat > "$D/ci-checks-stub.sh" </dev/null || echo 0) + 1 )); echo "\$n" > "$cnt" +echo "ci-checks stub invocation \$n" +[ "\$n" -ge 2 ] && exit 0 || exit 1 +EOF + chmod +x "$D/ci-checks-stub.sh" + export CI_CHECKS="$D/ci-checks-stub.sh" + + # RETRIES=1: initial run flakes, retry passes → PASS. + printf '0\n' > "$cnt"; : > "$D/r1.log" + THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks "$D/r1.log"; rc=$? + [ "$rc" -eq 0 ] && ok "flaky-then-green run_ci_checks returns 0 with RETRIES=1" || bad "retry should recover the flake (got rc=$rc)" + grep -qiE 'passed on retry' "$D/r1.log" \ + && ok "the gate log records a recovered-flake telemetry line" || bad "recovered flake must be logged, not silent (NFR-4); log: $(cat "$D/r1.log")" + + # RETRIES=0: no retry — the same flaky stub FAILS on its single attempt. + printf '0\n' > "$cnt"; : > "$D/r0.log" + THROUGHLINE_CI_CHECKS_RETRIES=0 run_ci_checks "$D/r0.log"; rc0=$? + [ "$rc0" -ne 0 ] && ok "RETRIES=0 disables the retry (the knob governs it)" || bad "RETRIES=0 should NOT retry (got rc=$rc0)" +) || true + +# =========================================================================== +# §2: ci-checks red-twice → real FAIL (no false PASS). A stub that fails on EVERY +# invocation must FAIL even with the retry — retry only re-observes a one-off, it +# never masks a reproducible failure (NFR-4). A non-numeric RETRIES default-warns. +echo "[§2] ci-checks red-twice → real FAIL (retry never masks a reproducible failure); RETRIES non-numeric → default-and-warn" +( D="$ROOT/s2"; mkdir -p "$D"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + mkdir -p "$D/state.d"; TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + # Stub that ALWAYS fails (a reproducible regression). + cat > "$D/ci-red.sh" <<'EOF' +#!/usr/bin/env bash +echo "ci-checks red (reproducible failure)"; exit 1 +EOF + chmod +x "$D/ci-red.sh" + export CI_CHECKS="$D/ci-red.sh" + + : > "$D/red.log" + THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks "$D/red.log"; rc=$? + [ "$rc" -ne 0 ] && ok "red-twice run_ci_checks returns non-zero (real FAIL, no false PASS)" || bad "a reproducible failure must FAIL even with retry (got rc=$rc)" + ! grep -qiE 'passed on retry' "$D/red.log" \ + && ok "no recovered-flake telemetry on a genuine FAIL" || bad "must NOT log a recovered flake when it really failed" + # NFR-4 honesty: a real FAIL must be as visible in the gate log as a recovery — + # record an explicit "FAILED after N attempt(s)" line on retry exhaustion so a + # reader can tell a retries-exhausted FAIL from a single-shot one. + grep -qiE 'ci-checks: FAILED after [0-9]+ attempt' "$D/red.log" \ + && ok "retry exhaustion logs an explicit FAILED-after-N telemetry line" || bad "a retries-exhausted FAIL must be logged (NFR-4); log: $(cat "$D/red.log")" + + # Non-numeric RETRIES → default-and-warn (still bounded; mirrors WATCH_MAX_SECS). + printf '0\n' > "$D/c2" + cat > "$D/ci-flaky2.sh" </dev/null || echo 0) + 1 )); echo "\$n" > "$D/c2" +[ "\$n" -ge 2 ] && exit 0 || exit 1 +EOF + chmod +x "$D/ci-flaky2.sh"; export CI_CHECKS="$D/ci-flaky2.sh" + : > "$D/warn.log" + warn="$(THROUGHLINE_CI_CHECKS_RETRIES=abc run_ci_checks "$D/warn.log" 2>&1 >/dev/null)"; rcw=$? + [ "$rcw" -eq 0 ] && ok "non-numeric RETRIES defaults to 1 (flaky-then-green still recovers)" || bad "non-numeric RETRIES should default to 1 and retry (got rc=$rcw)" + printf '%s' "$warn" | grep -qiE 'not numeric|falling back' \ + && ok "non-numeric RETRIES emits a default-and-warn diagnostic" || bad "non-numeric RETRIES should warn (got: '$warn')" +) || true + +# =========================================================================== +# tgr_setup_review_repo — git repo + scope-declaring TDD + a state fragment +# + a stub `claude` acting as BOTH the review gate (cats $CTL/review.out, exits +# $CTL/review.rc) and the rework model (runs $CTL/do_rework). Gates 1-3 are marked +# done so gate_one runs ONLY the review gate + its bounded rework loop. Mirrors +# tests/structural-classification-bound.test.sh::scb_setup_repo. Leaves PWD in the +# repo. The caller exports STATE_DIR etc. + sources $IMPL first. +tgr_setup_review_repo() { # + local d="$1"; mkdir -p "$d/ctl" "$d/bin" + cd "$d" || return 1 + git init -q -b master; git config user.email t@t.t; git config user.name t + mkdir -p src docs/tdd + printf 'ctl/\nbin/\n' > .gitignore + printf 'orig\n' > src/a.txt + cat > docs/tdd/0099-fix.md <<'EOF' +# TDD 0099: fixture +Status: draft +PRD refs: 1 + +## Touched files +- `src/a.txt` — the in-scope file + +## Expected diff size +- `src/a.txt` — ~50 lines added +EOF + git add -A; git commit -qm "build start" >/dev/null + printf '0\n' > "$d/ctl/review.rc" # default: review subprocess exits 0 + cat > "$d/bin/claude" </dev/null + exit "\$(cat "$d/ctl/review.rc" 2>/dev/null || echo 0)" +fi +echo "BATCH_RESULT: OK"; exit 0 +EOF + chmod +x "$d/bin/claude" + export PATH="$d/bin:$PATH" + export RTMPL="$REPO/scripts/review-prompt.md" RWTMPL="$REPO/scripts/rework-prompt.md" + export REVIEW_MODEL="" REBUILD=0 BASE=master + export THROUGHLINE_GATE_RETRIES=1 THROUGHLINE_GATE_BACKOFF_BASE=0 + export THROUGHLINE_REQUIRE_TEST_FIRST=0 THROUGHLINE_REQUIRE_RUNTIME_VERIFY=0 + RESUME_GATES_DONE_0099_fix="build,test-first,verify,verify-runtime" + export RESUME_GATES_DONE_0099_fix + _write_tdd_fragment 0099-fix 99 docs/tdd/0099-fix.md 1 reviewing review \ + 1000 1000 "feat/0099-fix" "" "log" "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" +} +# Commit build output past build-start so the consolidated review scope is +# non-empty (the empty-scope guard fails closed on a HEAD..HEAD scope). +tgr_build_output() { printf 'build-output\n' >> src/a.txt; git add -A; git commit -qm "build: simulated output" >/dev/null; } + +# §3: review no-verdict → gate-unobservable (resumable). The stub review +# subprocess exits rc=1 emitting NO REVIEW_RESULT: line plus a stderr line +# mimicking the `timeout … No such file` exec error. The fragment must record +# halt_cause=gate-unobservable (NOT failed/null), halt_cause_detail naming `review` +# + the stderr tail, status=blocked, and halt_next_actions beginning with resume — +# couldn't-observe, not observed-wrong (ADR 0006 / NFR-4). +echo "[§3] review subprocess emits NO REVIEW_RESULT → gate-unobservable (resumable, gate=review, stderr-tail detail)" +( D="$ROOT/s3"; mkdir -p "$D/state.d" + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" + export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } + BS="$(git rev-parse HEAD)" || { bad "git rev-parse failed"; exit 0; } + tgr_build_output || { bad "tgr_build_output failed"; exit 0; } + # No-verdict review: emit the exec-error tail to stdout (captured to the gate + # log by _claude_call) and exit non-zero, with NO REVIEW_RESULT line. + printf "timeout: failed to run command 'claude': No such file or directory\n" > "$D/repo/ctl/review.out" + printf '1\n' > "$D/repo/ctl/review.rc" + : > "$D/s3.log" + st="$(gate_one docs/tdd/0099-fix.md "$BS" "$D/s3.log")"; rc=$? + F="$STATE_DIR/0099-fix.json" + [ "$rc" -ne 0 ] && ok "gate_one does not clear on a no-verdict review" || bad "no-verdict review should not converge (rc=$rc)" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" = "gate-unobservable" ] && ok "review no-verdict → halt_cause=gate-unobservable" || bad "halt_cause should be gate-unobservable, NOT failed/null (got '$hc')" + stt="$(sed -n 's/.*"status":"\([^"]*\)".*/\1/p' "$F" | head -1)" + [ "$stt" = "blocked" ] && ok "fragment ends at status=blocked (resumable), not failed" || bad "status should be blocked (got '$stt')" + det="$(_read_fragment_field "$F" halt_cause_detail)" + printf '%s' "$det" | grep -q 'review' \ + && ok "halt_cause_detail names the gate (review)" || bad "detail should name review (got '$det')" + printf '%s' "$det" | grep -qi 'No such file or directory' \ + && ok "halt_cause_detail carries the captured stderr tail" || bad "detail should carry the stderr tail (got '$det')" + acts="$(sed -n 's/.*\("halt_next_actions":\[[^]]*\]\).*/\1/p' "$F" | head -1)" + printf '%s' "$acts" | grep -qE '(\[|,)"resume' \ + && ok "halt_next_actions begins with a resume action (auto-resumable)" || bad "halt_next_actions should begin with resume (got '$acts')" +) || true + +# §4: verify no-verdict → gate-unobservable (gate=verify-runtime). The verify-gate +# terminal-state write lives in lib/resume.sh's gate_one (OUTSIDE this TDD's +# declared ## Touched files) and is NOT rewired here — it still records the old +# terminal `failed`. This pins that the classifier is gate-AGNOSTIC so a future +# follow-up can wire that call site to it: _classify_gate_no_verdict +# verify-runtime records the SAME resumable gate-unobservable halt with +# gate=verify-runtime that the review path produces. +echo "[§4] verify-runtime no-verdict → gate-unobservable via the gate-agnostic helper (gate=verify-runtime)" +( D="$ROOT/s4"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + command -v _classify_gate_no_verdict >/dev/null 2>&1 \ + && ok "_classify_gate_no_verdict helper is defined (gate-agnostic)" || bad "_classify_gate_no_verdict should exist" + _write_tdd_fragment 0040-v 40 docs/tdd/0040-v.md 1 verifying verify-runtime 1000 1000 "feat/0040-v" "" log "" "" "build,test-first,verify" "" "" "" "" "" "" "" "" "" "" "" "" + _classify_gate_no_verdict 0040-v verify-runtime "timeout: failed to run command 'claude': No such file or directory" 2>/dev/null + F="$STATE_DIR/0040-v.json" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" = "gate-unobservable" ] && ok "verify-runtime no-verdict → halt_cause=gate-unobservable" || bad "halt_cause should be gate-unobservable (got '$hc')" + stt="$(sed -n 's/.*"status":"\([^"]*\)".*/\1/p' "$F" | head -1)" + [ "$stt" = "blocked" ] && ok "verify no-verdict ends at status=blocked (resumable)" || bad "status should be blocked (got '$stt')" + det="$(_read_fragment_field "$F" halt_cause_detail)" + printf '%s' "$det" | grep -q 'verify-runtime' \ + && ok "halt_cause_detail names the gate (verify-runtime)" || bad "detail should name verify-runtime (got '$det')" +) || true + +# §5: observed REVIEW_RESULT: BLOCK is UNTOUCHED — the discriminator is +# verdict-presence, not exit code. A review that emits BLOCK + a halting finding +# drives the bounded-rework path (converges here), NOT a gate-unobservable +# reclassification. Even when the subprocess ALSO exits non-zero, the verdict wins. +echo "[§5] observed REVIEW_RESULT: BLOCK drives bounded rework, NOT gate-unobservable (verdict wins, even on rc!=0)" +( D="$ROOT/s5"; mkdir -p "$D/state.d" + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" + export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } + BS="$(git rev-parse HEAD)" || { bad "git rev-parse failed"; exit 0; } + tgr_build_output || { bad "tgr_build_output failed"; exit 0; } + # Observed BLOCK + a non-structural major finding AND a non-zero exit — the + # verdict must still win (not reclassified gate-unobservable). + cat > "$D/repo/ctl/review.out" <<'EOF' +FINDING_BEGIN +severity: major +structural: false +region: src/a.txt:1-1 +region_lines: 8 +pattern_tags: [in-scope-fix] +summary: tighten the in-scope block +evidence: src/a.txt:1 needs a fix +FINDING_END +REVIEW_RESULT: BLOCK in-scope finding +EOF + printf '1\n' > "$D/repo/ctl/review.rc" # verdict present AND rc!=0 → verdict wins + cat > "$D/repo/ctl/do_rework" < src/a.txt +git add -A >/dev/null 2>&1; git commit -q -m "rework: in-scope fix to src/a.txt" >/dev/null 2>&1 +printf 'FILE_REVIEWED_NO_FINDINGS: src/a.txt\nREVIEW_RESULT: PASS\n' > "$D/repo/ctl/review.out" +printf '0\n' > "$D/repo/ctl/review.rc" +EOF + : > "$D/s5.log" + st="$(gate_one docs/tdd/0099-fix.md "$BS" "$D/s5.log")"; rc=$? + F="$STATE_DIR/0099-fix.json" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" != "gate-unobservable" ] \ + && ok "an observed BLOCK is NOT reclassified gate-unobservable (got halt_cause='$hc')" || bad "observed BLOCK must not become gate-unobservable" + grep -q '"outcome":"shipped"' "$F" 2>/dev/null \ + && ok "the observed BLOCK drove a bounded rework attempt (verdict-presence discriminator)" || bad "observed BLOCK should drive rework (rework_log: $(_read_fragment_raw_array "$F" rework_log))" +) || true + +# =========================================================================== +# §E: Failure-modes & edge cases from the TDD (coverage hardening for behavior +# already delivered in steps 1-3; no production change). +# §E1 double-flake (initial + retry both fail) → FAIL — retry-once is bounded, it +# is NOT a retry-until-green loop (which could mask a 50%-flaky real failure). +# §E2 RETRIES=2 raises the bound: a stub that passes only on the 3rd attempt +# recovers — the knob governs how many re-observations are allowed. +# §E3 a malformed/truncated review verdict (matches ^REVIEW_RESULT: but is neither +# PASS nor BLOCK) resolves to gate-unobservable (couldn't-observe), never a +# guessed PASS/FAIL (NFR-4: ambiguity resolves to couldn't-observe). +echo "[§E] Failure-modes: double-flake bounded to FAIL; RETRIES=2 raises the bound; malformed verdict → gate-unobservable" +( D="$ROOT/sE"; mkdir -p "$D"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + mkdir -p "$D/state.d"; TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + # A stub that passes only on its Nth invocation (counter-keyed), N from $PASS_ON. + cnt="$D/ec.count" + cat > "$D/ci-passon.sh" </dev/null || echo 0) + 1 )); echo "\$n" > "$cnt" +[ "\$n" -ge "\${PASS_ON:-99}" ] && exit 0 || exit 1 +EOF + chmod +x "$D/ci-passon.sh"; export CI_CHECKS="$D/ci-passon.sh" + + # §E1: passes only on attempt 3, but RETRIES=1 allows only 2 attempts → FAIL. + printf '0\n' > "$cnt"; : > "$D/e1.log" + PASS_ON=3 THROUGHLINE_CI_CHECKS_RETRIES=1 run_ci_checks "$D/e1.log"; rc=$? + [ "$rc" -ne 0 ] && ok "double-flake under RETRIES=1 → FAIL (retry-once is bounded, not retry-until-green)" || bad "RETRIES=1 must not recover a 3rd-attempt pass (got rc=$rc)" + + # §E2: same stub, RETRIES=2 → 3 attempts allowed → recovers. + printf '0\n' > "$cnt"; : > "$D/e2.log" + PASS_ON=3 THROUGHLINE_CI_CHECKS_RETRIES=2 run_ci_checks "$D/e2.log"; rc2=$? + [ "$rc2" -eq 0 ] && ok "RETRIES=2 raises the bound (a 3rd-attempt pass recovers; the knob governs)" || bad "RETRIES=2 should allow 3 attempts (got rc=$rc2)" + + # §E3: malformed/truncated review verdict → gate-unobservable. + export INTEGRATION="master" CHANGE="ci" LOGDIR="$D" MAINREPO="$D/repo" + tgr_setup_review_repo "$D/repo" || { bad "setup failed"; exit 0; } + BS="$(git rev-parse HEAD)" || { bad "git rev-parse failed"; exit 0; } + tgr_build_output || { bad "tgr_build_output failed"; exit 0; } + # Matches ^REVIEW_RESULT: but is neither PASS nor BLOCK — a truncated/garbled line. + printf 'REVIEW_RESULT: \n' > "$D/repo/ctl/review.out" + printf '0\n' > "$D/repo/ctl/review.rc" + : > "$D/e3.log" + st="$(gate_one docs/tdd/0099-fix.md "$BS" "$D/e3.log")"; rce=$? + F="$STATE_DIR/0099-fix.json" + hc="$(_read_fragment_field "$F" halt_cause)" + [ "$hc" = "gate-unobservable" ] \ + && ok "a malformed/truncated verdict resolves to gate-unobservable (NFR-4: no guessed verdict)" || bad "malformed verdict should be gate-unobservable (got halt_cause='$hc', rc=$rce, st='$st')" +) || true + +# =========================================================================== +# §6: enum membership + status.sh render (Component 3). set_halt_cause +# gate-unobservable returns 0 and writes the cause; the first next-action +# begins with `resume` (the resumable marker _resume_from + status.sh +# --check-paused key on); a value NOT in the closed enum still returns 1 (the +# addition is what admits gate-unobservable, not a wildcard). status.sh +# --check-paused surfaces it resumable=blocked, and the full render emits no +# unknown-cause warning. +echo "[§6] gate-unobservable: closed-enum membership, resume-first action, status.sh render" +( D="$ROOT/s6"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + _next_actions_for_cause gate-unobservable >/dev/null 2>&1 \ + && ok "_next_actions_for_cause admits gate-unobservable" || bad "gate-unobservable should be enumerated" + acts="$(_next_actions_for_cause gate-unobservable 2>/dev/null)" + printf '%s' "$acts" | grep -qE '^resume' \ + && ok "gate-unobservable's first next-action begins with resume" || bad "first next-action must begin with resume (got '$acts')" + # The action labels must be comma-free per element so the CSV round-trips. + _write_tdd_fragment 0040-x 40 docs/tdd/0040-x.md 1 blocked review 1000 1000 "feat/0040-x" "" log "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" "" "" "" "" "" + set_halt_cause 0040-x gate-unobservable review "timeout: failed to run command 'claude': No such file or directory" 2>/dev/null; rc=$? + [ "$rc" -eq 0 ] && ok "set_halt_cause gate-unobservable returns 0" || bad "set_halt_cause should accept gate-unobservable (got rc=$rc)" + hc="$(_read_fragment_field "$STATE_DIR/0040-x.json" halt_cause)" + [ "$hc" = "gate-unobservable" ] && ok "halt_cause written = gate-unobservable" || bad "halt_cause should be gate-unobservable (got '$hc')" + # Negative: an unknown cause still returns 1 (the enum is still closed). + set_halt_cause 0040-x not-a-real-cause-xyz review "" 2>/dev/null; rc2=$? + [ "$rc2" -ne 0 ] && ok "an unknown cause still returns non-zero (enum stays closed)" || bad "unknown cause must return non-zero" +) || true + +echo "[§6b] status.sh surfaces gate-unobservable as resumable=blocked with no unknown-cause warning" +( D="$ROOT/s6b"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + printf '{"schema":1,"started_at":1000,"updated_at":1001,"pid":1,"state":"blocked","total":1,"completed":0,"failed":0,"blocked":1,"skipped":0,"paused":0}\n' > "$D/state.d/run.json" + _write_tdd_fragment 0040-x 40 docs/tdd/0040-x.md 1 blocked review 1000 1000 "feat/0040-x" "" log "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" "" "" "" "" "" + set_halt_cause 0040-x gate-unobservable review "timeout: No such file or directory" 2>/dev/null + cp="$(bash "$REPO/scripts/status.sh" --logdir "$D" --check-paused 2>&1)" + printf '%s' "$cp" | grep -qE 'slug=0040-x .*cause=gate-unobservable resumable=blocked' \ + && ok "--check-paused surfaces cause=gate-unobservable resumable=blocked" || bad "should surface gate-unobservable resumable=blocked (got: '$cp')" + out="$(bash "$REPO/scripts/status.sh" --logdir "$D" 2>&1)" + printf '%s' "$out" | grep -qi 'unknown halt_cause' \ + && bad "status.sh must NOT warn unknown-cause for gate-unobservable (got: $out)" \ + || ok "full render emits no unknown-cause fallback warning" + printf '%s' "$out" | grep -q 'gate-unobservable' \ + && ok "the gate-unobservable cause label appears in the halt render" || bad "render should name gate-unobservable (got: $out)" +) || true + +# =========================================================================== +# §7: set_halt_cause write-failure fails loud (no silent non-resumable halt). +# Simulate a set_halt_cause write failure by overriding it (after sourcing) to +# return non-zero with a diagnostic. _classify_gate_no_verdict must propagate +# the failure (return non-zero) — NOT silently leave a blocked fragment lacking +# halt_cause=gate-unobservable, which would strand it as non-resumable and +# defeat §2's auto-resumability guarantee (TDD 0040 failure-modes edge case). +echo "[§7] set_halt_cause write-failure → _classify_gate_no_verdict fails loud (no stranded halt)" +( D="$ROOT/s7"; mkdir -p "$D/state.d"; cd "$D" || { bad "cd failed"; exit 0; } + export STATE_DIR="$D/state.d" STATE_STARTED_AT=1000 STATE_MODE="sequential" INTEGRATION="master" CHANGE="ci" LOGDIR="$D" + TDDS=() + THROUGHLINE_SOURCE_ONLY=1 source "$IMPL" || { bad "source guard missing"; exit 0; } + _write_tdd_fragment 0040-w7 40 docs/tdd/0040-w7.md 1 reviewing review \ + 1000 1000 "feat/0040-w7" "" log "" "" "build,test-first,verify,verify-runtime" "" "" "" "" "" "" "" + F="$STATE_DIR/0040-w7.json" + [ -f "$F" ] || { bad "fixture fragment not created"; exit 0; } + # Override set_halt_cause after sourcing to simulate a write failure. + set_halt_cause() { echo "error: set_halt_cause: simulated write failure for §7" >&2; return 1; } + diag="$(_classify_gate_no_verdict 0040-w7 review "exec error tail" 2>&1)"; rc=$? + [ "$rc" -ne 0 ] && ok "_classify_gate_no_verdict returns non-zero when set_halt_cause fails" \ + || bad "_classify_gate_no_verdict must propagate write failure (got rc=$rc)" + printf '%s' "$diag" | grep -qiE 'set_halt_cause|write fail|error' \ + && ok "_classify_gate_no_verdict propagates a diagnostic on write failure" \ + || bad "write failure should surface a diagnostic (got: '$diag')" + # Fragment must NOT end at status=blocked without halt_cause=gate-unobservable — + # a half-classified blocked fragment is non-resumable and stranded (defeats §2). + stt="$(sed -n 's/.*"status":"\([^"]*\)".*/\1/p' "$F" | head -1)" + hc="$(_read_fragment_field "$F" halt_cause)" + if [ "$stt" = "blocked" ] && [ "$hc" != "gate-unobservable" ]; then + bad "fragment must NOT be left status=blocked without halt_cause=gate-unobservable (stranded non-resumable)" + else + ok "fragment not left in a stranded non-resumable state (status='$stt' halt_cause='$hc')" + fi +) || true + +# =========================================================================== +# §W: dogfood (TDD 0040 §5 / TDD 0038 §3 wire-in rule). Wiring this eval into the +# aggregator is NEW gating behavior — the registration must make the aggregator's +# exit go non-zero when THIS eval fails. Structural: the eval is referenced in +# tests/implement-gate.test.sh. Behavioral: the aggregator's real final AND-chain +# (extracted verbatim, driven against stub integers — no recursion) evaluates +# false when TGR_FAIL=1. Before the wire-in the chain never references TGR_FAIL, +# so it is true (RED); after, it includes [ "$TGR_FAIL" -eq 0 ] (GREEN). +echo "[§W] the eval is wired into the aggregator and propagates failure (TDD 0040 §5)" +( AGG="$REPO/tests/implement-gate.test.sh" + [ -r "$AGG" ] || { bad "INFRA: §W — aggregator unreadable: $AGG"; exit 0; } + grep -qE 'transient-gate-resilience\.test\.sh' "$AGG" \ + && ok "the eval is registered in the aggregator" || bad "implement-gate.test.sh should register transient-gate-resilience.test.sh" + chain="$(grep -aE '^\[ "\$FAIL" -eq 0 \] &&' "$AGG" | tail -1)" + [ -n "$chain" ] || { bad "INFRA: §W — could not locate the aggregator final AND-chain"; exit 0; } + drive_rc="$( + set +u + for v in $(printf '%s' "$chain" | grep -aoE '\$[A-Za-z_][A-Za-z0-9_]*' | tr -d '$' | sort -u); do + eval "$v=0" + done + TGR_FAIL=1 + eval "$chain"; echo $? + )" + [ "$drive_rc" != "0" ] \ + && ok "aggregator final AND-chain goes non-zero when this eval fails (wire-in propagates)" \ + || bad "aggregator AND-chain must be non-zero with TGR_FAIL=1 (got rc=$drive_rc)" +) || true + +# --- report ---------------------------------------------------------------- +# Fail loud (FR-74 #1): the result tally is what makes every assertion above +# enforceable — the final `[ "$FAIL" -eq 0 ]` sets the script's exit code, so a +# single `bad` makes `bash tests/transient-gate-resilience.test.sh` exit non-zero +# and the aggregator's `|| TGR_FAIL=1` (step 5) catches it. +echo +PASS="$(grep -c '^ok$' "$RESULTS" 2>/dev/null)"; PASS="${PASS:-0}" +FAIL="$(grep -c '^fail$' "$RESULTS" 2>/dev/null)"; FAIL="${FAIL:-0}" +rm -f "$RESULTS" +echo "=== transient-gate-resilience eval: $PASS passed, $FAIL failed ===" +[ "$FAIL" -eq 0 ]