Skip to content
Merged
84 changes: 84 additions & 0 deletions .claude/hooks/lib/gaia-active-plan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Shared, sourced resolver for the plan folder and feature key backing this
# branch's active KICKOFF execution. Hooks source this to key a side effect
# (a tally record, a roll-up render) to the right plan without depending on
# session-scoped state. Pure and side-effect-free; every function always
# returns 0, even when nothing resolves or the repo has no plans directory
# at all.
#
# Usage:
# . .claude/hooks/lib/gaia-active-plan.sh
# plan_dir="$(resolve_active_plan_dir)"
# [ -n "$plan_dir" ] && feature_key="$(resolve_feature_key "$plan_dir")"

# Echoes the repo-relative path of the plan directory whose RUNNING sentinel
# names the current branch, or nothing when none match. When several plans
# match, disambiguates on the lexicographically latest `started:` value
# (ISO-8601 sorts correctly as a string): the most recently started run
# wins. A RUNNING file missing a `branch:` or `started:` line is skipped,
# not an error.
resolve_active_plan_dir() {
local cur running_file file_branch file_started best_dir best_started

cur="$(git branch --show-current 2>/dev/null)" || true
[ -n "$cur" ] || return 0

best_dir=""
best_started=""
for running_file in .gaia/local/plans/*/RUNNING; do
[ -f "$running_file" ] || continue

file_branch="$(grep '^branch:' "$running_file" 2>/dev/null | cut -d' ' -f2)" || true
[ "$file_branch" = "$cur" ] || continue

file_started="$(grep '^started:' "$running_file" 2>/dev/null | cut -d' ' -f2)" || true
if [ -z "$best_dir" ] || [[ "$file_started" > "$best_started" ]]; then
best_dir="$(dirname "$running_file")"
best_started="$file_started"
fi
done

[ -n "$best_dir" ] && printf '%s' "$best_dir"
return 0
}

# Echoes the feature key for a plan directory: basename(dirname(SPEC path)),
# read from the `Derived from … (…)` line inside <plan_dir>/README.md's
# `## Source SPEC` section (the same resolution the planning step uses, so
# a feature's spec / plan / execute records all key together). Falls back to
# a bare `SPEC-NNN` scan of that line when the path is unparseable, and
# ultimately to the plan directory's own basename (the slug) for a spec-less
# plan.
resolve_feature_key() {
local plan_dir="$1" readme source_line path key

readme="$plan_dir/README.md"
source_line=""
if [ -f "$readme" ]; then
source_line="$(awk '
/^## Source SPEC/ { insec=1; next }
insec && /^## / { exit }
insec && /Derived from/ { print; exit }
' "$readme" 2>/dev/null)" || true
fi

if [ -n "$source_line" ]; then
path="$(printf '%s' "$source_line" | sed -nE 's/^[^(]*\(([^)]*)\).*/\1/p')" || true
if [ -n "$path" ]; then
key="$(basename "$(dirname "$path")" 2>/dev/null)" || true
if [ -n "$key" ] && [ "$key" != "." ] && [ "$key" != "/" ]; then
printf '%s' "$key"
return 0
fi
fi

key="$(printf '%s' "$source_line" | grep -oE 'SPEC-[0-9]+' | head -1)" || true
if [ -n "$key" ]; then
printf '%s' "$key"
return 0
fi
fi

basename "$plan_dir"
return 0
}
94 changes: 94 additions & 0 deletions .claude/hooks/token-rollup-merge.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
#!/usr/bin/env bash
# PostToolUse Bash hook on `gh pr merge`. Renders the full-cycle token-cost
# roll-up (spec / plan / execute / total) for the merging feature into the
# session's context. Session-independent: resolves the feature key from
# on-disk state only, the active plan folder or, failing that, the ledger's
# most recent execute record, so it renders from any session that runs the
# merge, including a fresh top-level session that never ran the plan itself.

set -euo pipefail
trap 'exit 0' ERR

command -v jq >/dev/null 2>&1 || exit 0

payload=$(cat)
tool_name=$(jq -r '.tool_name // ""' <<<"$payload")
[ "$tool_name" = "Bash" ] || exit 0

cmd=$(jq -r '.tool_input.command // ""' <<<"$payload")

# Match `gh pr merge` as a real shell invocation, at command start or right
# after a shell separator (&&, ;, ||, |, newline), not when mentioned mid-line
# in prose or a quoted string (e.g. a commit message). The newline separator
# does match a heredoc body line that begins with the command; that edge is
# benign (a spurious readout with no merge) and accepted.
# Mirrors pr-merge-audit-check.sh's command match.
start_re='^[[:space:]]*gh[[:space:]]+pr[[:space:]]+merge([[:space:]]|$)'
sep_re=$'(\\&\\&|;|\\|\\||\\||\n)[[:space:]]*gh[[:space:]]+pr[[:space:]]+merge([[:space:]]|$)'
if [[ "$cmd" =~ $start_re ]]; then
:
elif [[ "$cmd" =~ $sep_re ]]; then
:
else
exit 0
fi

feature_key=""
fallback=0

# Primary: the active plan folder for this branch, keyed the same way the
# plan's own execute records are (the RUNNING sentinel + README Source SPEC).
# Present at merge time because the plan's self-cleanup runs only after the
# merge is confirmed, so this resolves correctly for the normal in-session
# merge.
if [ -f .claude/hooks/lib/gaia-active-plan.sh ]; then
. .claude/hooks/lib/gaia-active-plan.sh
plan_dir="$(resolve_active_plan_dir)" || true
if [ -n "$plan_dir" ]; then
feature_key="$(resolve_feature_key "$plan_dir")" || true
fi
fi

# Fallback: best-effort, for a fresh session with no active plan folder in
# view (e.g. a worktree-continuation merge). Keys to the most-recent execute
# record in the ledger, resolved the same way token-tally.sh / token-rollup.sh
# resolve it (the main checkout, even when run from a linked worktree). This
# is not guaranteed to be the merging feature (an interleaved prior feature's
# execute row could be newer), so it is labeled at render time.
if [ -z "$feature_key" ]; then
common_dir=$(git rev-parse --git-common-dir 2>/dev/null || true)
if [ -n "$common_dir" ]; then
case "$common_dir" in
/*) abs="$common_dir" ;;
*) abs="$PWD/$common_dir" ;;
esac
main_root=$(cd "$(dirname "$abs")" 2>/dev/null && pwd || true)
if [ -n "$main_root" ]; then
ledger="$main_root/.gaia/local/telemetry/tokens.jsonl"
if [ -f "$ledger" ]; then
feature_key=$(jq -R -s -r '
split("\n") | map(select(length > 0))
| map(try fromjson catch empty)
| map(select(type == "object" and .action == "execute" and (.spec_id // "") != ""))
| sort_by(.ts // "")
| last
| .spec_id // empty
' "$ledger" 2>/dev/null || true)
[ -n "$feature_key" ] && fallback=1
fi
fi
fi
fi

[ -n "$feature_key" ] || exit 0

rollup=$(bash .gaia/scripts/token-rollup.sh --spec-id "$feature_key" 2>/dev/null || true)
[ -n "$rollup" ] || exit 0

if [ "$fallback" -eq 1 ]; then
printf '[cycle cost at merge - feature key resolved from the ledger'"'"'s most recent execution; no active plan folder was found]\n%s\n' "$rollup"
else
printf '[cycle cost at merge]\n%s\n' "$rollup"
fi

exit 0
65 changes: 65 additions & 0 deletions .claude/hooks/token-tally-git-op.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/usr/bin/env bash
# PreToolUse Bash hook: records this execution session's ground-truth token
# tally on the orchestrator's per-phase git commit/push, so a resumed or
# worktree session is captured deterministically instead of depending on a
# session-scoped prose instruction. Gated on an active plan folder (a
# RUNNING sentinel whose branch matches the current branch) and keyed to
# that plan's feature. This hook only performs a side effect: it never
# blocks the git operation and never emits a permission decision.

set -euo pipefail
trap 'exit 0' ERR

command -v jq >/dev/null 2>&1 || exit 0

payload=$(cat)
tool_name=$(jq -r '.tool_name // ""' <<<"$payload")
[ "$tool_name" = "Bash" ] || exit 0

cmd=$(jq -r '.tool_input.command // ""' <<<"$payload")

# Match `git commit` or `git push` as a real shell invocation, at command
# start or right after a shell separator (&&, ;, ||, |, newline), not when
# mentioned mid-line in prose or a quoted string (e.g. a commit message).
# Bash `=~` gives whole-string semantics; `grep` is line-oriented and would
# match every heredoc body line. The newline separator here still matches a
# heredoc body line that begins with the command; that edge is benign (one
# extra tally row the per-session dedup collapses) and accepted.
start_re='^[[:space:]]*git[[:space:]]+(commit|push)([[:space:]]|$)'
sep_re=$'(\\&\\&|;|\\|\\||\\||\n)[[:space:]]*git[[:space:]]+(commit|push)([[:space:]]|$)'
if [[ "$cmd" =~ $start_re ]]; then
:
elif [[ "$cmd" =~ $sep_re ]]; then
:
else
exit 0
fi

# Cheap negative gate: no plan folder at all, skip before sourcing the
# resolver lib or paying for token-tally.sh's transcript parse.
has_plan=0
for d in .gaia/local/plans/*/; do
[ -d "$d" ] || continue
has_plan=1
break
done
[ "$has_plan" -eq 1 ] || exit 0

. .claude/hooks/lib/gaia-active-plan.sh
plan_dir="$(resolve_active_plan_dir)"
[ -n "$plan_dir" ] || exit 0

feature_key="$(resolve_feature_key "$plan_dir")"
slug="$(basename "$plan_dir")"
sid=$(jq -r '.session_id // ""' <<<"$payload")

# GAIA_TALLY_PROJECTS_ROOT is a documented test seam: unset in production
# (token-tally.sh falls back to its $HOME/.claude/projects default), set by
# bats to point at a fixture so no test run ever touches a real session's
# transcript search path.
bash .gaia/scripts/token-tally.sh \
--action execute --spec-id "$feature_key" --plan-slug "$slug" \
--out-dir "$plan_dir" --session-id "$sid" \
${GAIA_TALLY_PROJECTS_ROOT:+--projects-root "$GAIA_TALLY_PROJECTS_ROOT"} >/dev/null 2>&1 || true

exit 0
9 changes: 9 additions & 0 deletions .claude/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,11 @@
"command": ".claude/hooks/pr-merge-audit-check.sh",
"statusMessage": "Checking PR-merge audit gate…"
},
{
"type": "command",
"command": ".claude/hooks/token-tally-git-op.sh",
"statusMessage": "Recording execute-phase token tally…"
},
{
"type": "command",
"command": ".claude/hooks/worthiness-presence-check.sh",
Expand Down Expand Up @@ -179,6 +184,10 @@
"type": "command",
"command": ".claude/hooks/wiki-commit-nudge.sh"
},
{
"type": "command",
"command": ".claude/hooks/token-rollup-merge.sh"
},
{
"type": "command",
"command": ".claude/hooks/capture-red-observations.sh"
Expand Down
15 changes: 7 additions & 8 deletions .claude/skills/gaia/references/plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,19 +190,18 @@ Then write the following files directly to `{PLAN_DIR}/`:
- **Stop conditions.** On any sub-agent failure or quality-gate failure: STOP and surface to the user. Do not "fix and continue", do not commit, do not push. Before stopping, append the failure context (which phase, which sub-agent, error) to `SUMMARY.md` under a `## Phase N, <title> (HALTED)` block so the user and any follow-up session see the same record.
- **Final summary.** After all implementation phases pass and the final commit is pushed, before awaiting merge confirmation, **read `{PLAN_DIR}/SUMMARY.md`** and print a brief summary to the user: phases completed, sub-agents run, files touched (count), commits pushed (count + short SHAs), PR URL, quality-gate status, and the highest-signal findings/deviations/follow-ups drawn from `SUMMARY.md` so nothing is lost to context compression. Keep it tight, a few lines plus the surfaced notes, not a recap of every change.

**Token tally (execute-time).** After the pre-merge `code-review-audit`'s clean-pass marker is written and before the Final self-cleanup phase deletes the plan folder, the orchestrator runs the token tally for this KICKOFF execution and reports the printed four-bucket total and wall-clock elapsed to the user, so the run's dominant sub-agent fan-outs (including the pre-merge audit) are all counted, and the `--out-dir` still exists (the ledger itself lives in the main checkout and survives cleanup). Substitute the plan's real SPEC id (from the `## Source SPEC` section of `README.md`, or the plan slug if the plan has no SPEC), the real plan slug, and the absolute plan directory:
**Token tally (execute-time).** Execute-phase token tallies are recorded automatically: a `PreToolUse` hook on the orchestrator's per-phase git commit/push records this session's execute tally to the durable ledger, keyed to the feature (the SPEC id resolved from the active plan folder, or the plan slug when spec-less). Resumed, halted, and worktree sessions are all captured. The orchestrator does not run a manual execute tally, doing so would double-count the phase.

After the pre-merge `code-review-audit`'s clean-pass marker is written and before the Final self-cleanup phase deletes the plan folder, the orchestrator reports the full-cycle cost by running the roll-up reader and surfacing its spec / plan / execute / total breakdown plus wall-clock elapsed to the user. Substitute the plan's real SPEC id (from the `## Source SPEC` section of `README.md`, or the plan slug if the plan has no SPEC):

```bash
if [ -x .gaia/scripts/token-tally.sh ]; then
bash .gaia/scripts/token-tally.sh \
--action execute \
--spec-id "<SPEC-NNN from README's Source SPEC, or the plan slug if none>" \
--plan-slug "<plan slug = basename of the plan dir>" \
--out-dir "<absolute plan dir>" || true
if [ -x .gaia/scripts/token-rollup.sh ]; then
bash .gaia/scripts/token-rollup.sh \
--spec-id "<SPEC-NNN from README's Source SPEC, or the plan slug if none>" || true
fi
```

This attributes the whole execution session (main transcript plus every phase sub-agent sidecar, deduped to ground truth) to the plan, appends a durable ledger record keyed to the plan, and reports the tally to the user. It never blocks: the `-x` guard and trailing `|| true` mean a missing or failing helper degrades silently. Because the tally runs after the audit's sub-agent fan-out and counts every sidecar, the reported total is at least the sidecar-only sum.
A `PostToolUse` hook on `gh pr merge` renders the same roll-up at the merge boundary, so the readout also appears when the merge runs from a fresh top-level session. The reader never blocks and never fabricates a number: the `-x` guard and trailing `|| true` mean a missing or failing helper degrades silently, and an unreadable ledger degrades to a partial or absent figure with a marker.
- **Final self-cleanup phase (last step before merge).** After all implementation phases pass and the user has reviewed the PR and confirmed it is ready to merge, the orchestrator deletes its own plan folder so scaffolding does not persist locally. Delete it by its literal repo-relative path: `rm -rf .gaia/local/plans/<slug>` (substitute the plan's slug). The literal path matches the project's `rm -rf .gaia/local/plans/*` permission and the `block-rm-rf.sh` whitelist, so it clears without a prompt; do not reconstruct an absolute path from variables (`"$ROOT/$PLAN_REL"`), which both misses that permission match and trips the empty-variable rm guard. This removes `SUMMARY.md` along with everything else, by this point its content has already been surfaced in the Final summary. Then check `git check-ignore .gaia/local/plans/`, if it is gitignored (the GAIA default), the deletion is invisible to git: skip the commit and report "plan folder removed locally; gitignored, no commit needed." If the path is tracked, commit and push the deletion as the final commit on the PR. If the user explicitly asks to keep the plan folder for archival, the orchestrator skips the deletion and reports.
- **Post-merge worktree cleanup (worktree-mode runs only).** When the orchestrator's pre-flight chose worktree mode (or the run was dispatched into a worktree by upstream tooling), the post-merge phase runs the cleanup procedure below AFTER the user confirms the PR is merged. The procedure detects the squash-merge state and discards the worktree without prompting (the SPEC clarifications.answered confirms pre-consent: the orchestrator told the user "after merge, the worktree will be discarded" before opening the PR; the user merging the PR is the consent).
1. Confirm merge via `gh pr view <N> --json state`. Parse the JSON; require `.state == "MERGED"`. If not merged, do NOT proceed, surface to user and stop.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"action":"spec","spec_id":"SPEC-231","session_id":"sp1","buckets":{"fresh_input":300,"cache_write":600,"cache_read":1800,"output":300},"total":3000,"partial":false,"started_at":"2026-05-04T09:00:00.000Z","ended_at":"2026-05-04T09:01:00.000Z","duration_seconds":60,"duration_available":true,"ts":"2026-05-04T09:01:00Z"}
42
{"action":"plan","spec_id":"SPEC-231","plan_slug":"spec-231-slug","session_id":"pl1","buckets":{"fresh_input":400,"cache_write":800,"cache_read":2400,"output":400},"total":4000,"partial":false,"started_at":"2026-05-04T09:05:00.000Z","ended_at":"2026-05-04T09:07:00.000Z","duration_seconds":120,"duration_available":true,"ts":"2026-05-04T09:07:00Z"}
[1, 2]
3 changes: 3 additions & 0 deletions .gaia/scripts/tests/fixtures/token-rollup/corrupt.jsonl
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{"action":"spec","spec_id":"SPEC-230","session_id":"sp1","buckets":{"fresh_input":500,"cache_write":1000,"cache_read":3000,"output":500},"total":5000,"partial":false,"started_at":"2026-05-03T09:00:00.000Z","ended_at":"2026-05-03T09:01:00.000Z","duration_seconds":60,"duration_available":true,"ts":"2026-05-03T09:01:00Z"}
this is not json {{{
{"action":"plan","spec_id":"SPEC-230","plan_slug":"spec-230-slug","session_id":"pl1","buckets":{"fresh_input":600,"cache_write":1200,"cache_read":3600,"output":600},"total":6000,"partial":false,"started_at":"2026-05-03T09:05:00.000Z","ended_at":"2026-05-03T09:06:30.000Z","duration_seconds":90,"duration_available":true,"ts":"2026-05-03T09:06:30Z"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s1","buckets":{"fresh_input":10000,"cache_write":50000,"cache_read":900000,"output":40000},"total":1000000,"started_at":"2026-04-01T09:50:00.000Z","ended_at":"2026-04-01T10:00:00.000Z","duration_seconds":600,"duration_available":true,"ts":"2026-04-01T10:00:00Z"}
{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s1","buckets":{"fresh_input":15000,"cache_write":75000,"cache_read":1350000,"output":60000},"total":1500000,"started_at":"2026-04-01T09:50:00.000Z","ended_at":"2026-04-01T10:10:00.000Z","duration_seconds":1200,"duration_available":true,"ts":"2026-04-01T10:10:00Z"}
{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s2","buckets":{"fresh_input":20000,"cache_write":100000,"cache_read":1800000,"output":80000},"total":2000000,"partial":false,"started_at":"2026-04-01T14:00:00.000Z","ended_at":"2026-04-01T14:15:00.000Z","duration_seconds":900,"duration_available":true,"ts":"2026-04-01T14:15:00Z"}
{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s2","buckets":{"fresh_input":26000,"cache_write":130000,"cache_read":2340000,"output":104000},"total":2600000,"partial":false,"started_at":"2026-04-01T14:00:00.000Z","ended_at":"2026-04-01T14:25:00.000Z","duration_seconds":1500,"duration_available":true,"ts":"2026-04-01T14:25:00Z"}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{"action":"execute","spec_id":"SPEC-204","plan_slug":"spec-204-slug","session_id":"sess1","buckets":{"fresh_input":3000,"cache_write":15000,"cache_read":270000,"output":12000},"total":300000,"partial":true,"started_at":"2026-03-04T09:50:00.000Z","ended_at":"2026-03-04T10:00:00.000Z","duration_seconds":150,"duration_available":true,"ts":"2026-03-04T10:00:00Z"}
{"action":"execute","spec_id":"SPEC-204","plan_slug":"spec-204-slug","session_id":"sess1","buckets":{"fresh_input":5000,"cache_write":25000,"cache_read":450000,"output":20000},"total":500000,"partial":true,"started_at":"2026-03-04T09:50:00.000Z","ended_at":"2026-03-04T10:05:00.000Z","duration_seconds":250,"duration_available":true,"ts":"2026-03-04T10:05:00Z"}
Loading