From c7bbba9834b16c09412be2ff6bc19979c53e24ae Mon Sep 17 00:00:00 2001 From: Steven Sacks Date: Fri, 3 Jul 2026 21:45:03 +0900 Subject: [PATCH 1/7] =?UTF-8?q?feat(gaia):=20SPEC-017=20phase=201=20?= =?UTF-8?q?=E2=80=94=20token=20roll-up=20reader=20+=20git-op=20recording?= =?UTF-8?q?=20hook?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add .gaia/scripts/token-rollup.sh: reads the token ledger, dedups each session's re-invocation rows (non-partial, max total, tiebreak latest ended_at), and renders the spec/plan/execute/total cycle breakdown with summed active-span elapsed and the four billing buckets. Always exits 0; degrades to a partial/absent figure with a marker, never fabricates. Add .claude/hooks/token-tally-git-op.sh (PreToolUse Bash) plus shared resolver lib .claude/hooks/lib/gaia-active-plan.sh: record this execution session's ground-truth tally on the orchestrator's per-phase git commit/push, gated on an active plan folder, keyed to the feature. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/hooks/lib/gaia-active-plan.sh | 84 +++++ .claude/hooks/token-tally-git-op.sh | 63 ++++ .../tests/fixtures/token-rollup/corrupt.jsonl | 3 + .../fixtures/token-rollup/cross-session.jsonl | 4 + .../token-rollup/dedup-all-partial.jsonl | 2 + .../fixtures/token-rollup/dedup-inflate.jsonl | 2 + .../fixtures/token-rollup/dedup-lower.jsonl | 3 + .../token-rollup/dedup-tiebreak.jsonl | 2 + .../fixtures/token-rollup/full-cycle.jsonl | 4 + .../fixtures/token-rollup/spec-less.jsonl | 2 + .../token-rollup/unavailable-elapsed.jsonl | 1 + .gaia/scripts/tests/token-rollup.bats | 253 +++++++++++++++ .gaia/scripts/token-rollup.sh | 244 +++++++++++++++ .gaia/tests/hooks/token-tally-git-op.bats | 289 ++++++++++++++++++ 14 files changed, 956 insertions(+) create mode 100755 .claude/hooks/lib/gaia-active-plan.sh create mode 100755 .claude/hooks/token-tally-git-op.sh create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/corrupt.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/cross-session.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/dedup-all-partial.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/dedup-inflate.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/dedup-lower.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/dedup-tiebreak.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/full-cycle.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/spec-less.jsonl create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/unavailable-elapsed.jsonl create mode 100644 .gaia/scripts/tests/token-rollup.bats create mode 100755 .gaia/scripts/token-rollup.sh create mode 100644 .gaia/tests/hooks/token-tally-git-op.bats diff --git a/.claude/hooks/lib/gaia-active-plan.sh b/.claude/hooks/lib/gaia-active-plan.sh new file mode 100755 index 00000000..b6140938 --- /dev/null +++ b/.claude/hooks/lib/gaia-active-plan.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +# Shared, sourced resolver for the plan folder and feature key backing this +# branch's active KICKOFF execution. Hooks source this to key a side effect +# (a tally record, a roll-up render) to the right plan without depending on +# session-scoped state. Pure and side-effect-free; every function always +# returns 0, even when nothing resolves or the repo has no plans directory +# at all. +# +# Usage: +# . .claude/hooks/lib/gaia-active-plan.sh +# plan_dir="$(resolve_active_plan_dir)" +# [ -n "$plan_dir" ] && feature_key="$(resolve_feature_key "$plan_dir")" + +# Echoes the repo-relative path of the plan directory whose RUNNING sentinel +# names the current branch, or nothing when none match. When several plans +# match, disambiguates on the lexicographically latest `started:` value +# (ISO-8601 sorts correctly as a string): the most recently started run +# wins. A RUNNING file missing a `branch:` or `started:` line is skipped, +# not an error. +resolve_active_plan_dir() { + local cur running_file file_branch file_started best_dir best_started + + cur="$(git branch --show-current 2>/dev/null)" || true + [ -n "$cur" ] || return 0 + + best_dir="" + best_started="" + for running_file in .gaia/local/plans/*/RUNNING; do + [ -f "$running_file" ] || continue + + file_branch="$(grep '^branch:' "$running_file" 2>/dev/null | cut -d' ' -f2)" || true + [ "$file_branch" = "$cur" ] || continue + + file_started="$(grep '^started:' "$running_file" 2>/dev/null | cut -d' ' -f2)" || true + if [ -z "$best_dir" ] || [[ "$file_started" > "$best_started" ]]; then + best_dir="$(dirname "$running_file")" + best_started="$file_started" + fi + done + + [ -n "$best_dir" ] && printf '%s' "$best_dir" + return 0 +} + +# Echoes the feature key for a plan directory: basename(dirname(SPEC path)), +# read from the `Derived from … (…)` line inside /README.md's +# `## Source SPEC` section (the same resolution the planning step uses, so +# a feature's spec / plan / execute records all key together). Falls back to +# a bare `SPEC-NNN` scan of that line when the path is unparseable, and +# ultimately to the plan directory's own basename (the slug) for a spec-less +# plan. +resolve_feature_key() { + local plan_dir="$1" readme source_line path key + + readme="$plan_dir/README.md" + source_line="" + if [ -f "$readme" ]; then + source_line="$(awk ' + /^## Source SPEC/ { insec=1; next } + insec && /^## / { exit } + insec && /Derived from/ { print; exit } + ' "$readme" 2>/dev/null)" || true + fi + + if [ -n "$source_line" ]; then + path="$(printf '%s' "$source_line" | sed -nE 's/^[^(]*\(([^)]*)\).*/\1/p')" || true + if [ -n "$path" ]; then + key="$(basename "$(dirname "$path")" 2>/dev/null)" || true + if [ -n "$key" ] && [ "$key" != "." ] && [ "$key" != "/" ]; then + printf '%s' "$key" + return 0 + fi + fi + + key="$(printf '%s' "$source_line" | grep -oE 'SPEC-[0-9]+' | head -1)" || true + if [ -n "$key" ]; then + printf '%s' "$key" + return 0 + fi + fi + + basename "$plan_dir" + return 0 +} diff --git a/.claude/hooks/token-tally-git-op.sh b/.claude/hooks/token-tally-git-op.sh new file mode 100755 index 00000000..17a492f1 --- /dev/null +++ b/.claude/hooks/token-tally-git-op.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# PreToolUse Bash hook: records this execution session's ground-truth token +# tally on the orchestrator's per-phase git commit/push, so a resumed or +# worktree session is captured deterministically instead of depending on a +# session-scoped prose instruction. Gated on an active plan folder (a +# RUNNING sentinel whose branch matches the current branch) and keyed to +# that plan's feature. This hook only performs a side effect: it never +# blocks the git operation and never emits a permission decision. + +set -euo pipefail +trap 'exit 0' ERR + +command -v jq >/dev/null 2>&1 || exit 0 + +payload=$(cat) +tool_name=$(jq -r '.tool_name // ""' <<<"$payload") +[ "$tool_name" = "Bash" ] || exit 0 + +cmd=$(jq -r '.tool_input.command // ""' <<<"$payload") + +# Match `git commit` or `git push` as a real shell invocation, at command +# start or right after a shell separator (&&, ;, ||, |, newline), never +# inside a heredoc body or a quoted string (e.g. a commit message that +# mentions the command in prose). Bash `=~` gives whole-string semantics; +# `grep` is line-oriented and would match heredoc body lines too. +start_re='^[[:space:]]*git[[:space:]]+(commit|push)([[:space:]]|$)' +sep_re=$'(\\&\\&|;|\\|\\||\\||\n)[[:space:]]*git[[:space:]]+(commit|push)([[:space:]]|$)' +if [[ "$cmd" =~ $start_re ]]; then + : +elif [[ "$cmd" =~ $sep_re ]]; then + : +else + exit 0 +fi + +# Cheap negative gate: no plan folder at all, skip before sourcing the +# resolver lib or paying for token-tally.sh's transcript parse. +has_plan=0 +for d in .gaia/local/plans/*/; do + [ -d "$d" ] || continue + has_plan=1 + break +done +[ "$has_plan" -eq 1 ] || exit 0 + +. .claude/hooks/lib/gaia-active-plan.sh +plan_dir="$(resolve_active_plan_dir)" +[ -n "$plan_dir" ] || exit 0 + +feature_key="$(resolve_feature_key "$plan_dir")" +slug="$(basename "$plan_dir")" +sid=$(jq -r '.session_id // ""' <<<"$payload") + +# GAIA_TALLY_PROJECTS_ROOT is a documented test seam: unset in production +# (token-tally.sh falls back to its $HOME/.claude/projects default), set by +# bats to point at a fixture so no test run ever touches a real session's +# transcript search path. +bash .gaia/scripts/token-tally.sh \ + --action execute --spec-id "$feature_key" --plan-slug "$slug" \ + --out-dir "$plan_dir" --session-id "$sid" \ + ${GAIA_TALLY_PROJECTS_ROOT:+--projects-root "$GAIA_TALLY_PROJECTS_ROOT"} >/dev/null 2>&1 || true + +exit 0 diff --git a/.gaia/scripts/tests/fixtures/token-rollup/corrupt.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/corrupt.jsonl new file mode 100644 index 00000000..057cccba --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/corrupt.jsonl @@ -0,0 +1,3 @@ +{"action":"spec","spec_id":"SPEC-230","session_id":"sp1","buckets":{"fresh_input":500,"cache_write":1000,"cache_read":3000,"output":500},"total":5000,"partial":false,"started_at":"2026-05-03T09:00:00.000Z","ended_at":"2026-05-03T09:01:00.000Z","duration_seconds":60,"duration_available":true,"ts":"2026-05-03T09:01:00Z"} +this is not json {{{ +{"action":"plan","spec_id":"SPEC-230","plan_slug":"spec-230-slug","session_id":"pl1","buckets":{"fresh_input":600,"cache_write":1200,"cache_read":3600,"output":600},"total":6000,"partial":false,"started_at":"2026-05-03T09:05:00.000Z","ended_at":"2026-05-03T09:06:30.000Z","duration_seconds":90,"duration_available":true,"ts":"2026-05-03T09:06:30Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/cross-session.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/cross-session.jsonl new file mode 100644 index 00000000..cb280d6c --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/cross-session.jsonl @@ -0,0 +1,4 @@ +{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s1","buckets":{"fresh_input":10000,"cache_write":50000,"cache_read":900000,"output":40000},"total":1000000,"started_at":"2026-04-01T09:50:00.000Z","ended_at":"2026-04-01T10:00:00.000Z","duration_seconds":600,"duration_available":true,"ts":"2026-04-01T10:00:00Z"} +{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s1","buckets":{"fresh_input":15000,"cache_write":75000,"cache_read":1350000,"output":60000},"total":1500000,"started_at":"2026-04-01T09:50:00.000Z","ended_at":"2026-04-01T10:10:00.000Z","duration_seconds":1200,"duration_available":true,"ts":"2026-04-01T10:10:00Z"} +{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s2","buckets":{"fresh_input":20000,"cache_write":100000,"cache_read":1800000,"output":80000},"total":2000000,"partial":false,"started_at":"2026-04-01T14:00:00.000Z","ended_at":"2026-04-01T14:15:00.000Z","duration_seconds":900,"duration_available":true,"ts":"2026-04-01T14:15:00Z"} +{"action":"execute","spec_id":"SPEC-210","plan_slug":"spec-210-slug","session_id":"sess-s2","buckets":{"fresh_input":26000,"cache_write":130000,"cache_read":2340000,"output":104000},"total":2600000,"partial":false,"started_at":"2026-04-01T14:00:00.000Z","ended_at":"2026-04-01T14:25:00.000Z","duration_seconds":1500,"duration_available":true,"ts":"2026-04-01T14:25:00Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/dedup-all-partial.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/dedup-all-partial.jsonl new file mode 100644 index 00000000..a0e45cba --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/dedup-all-partial.jsonl @@ -0,0 +1,2 @@ +{"action":"execute","spec_id":"SPEC-204","plan_slug":"spec-204-slug","session_id":"sess1","buckets":{"fresh_input":3000,"cache_write":15000,"cache_read":270000,"output":12000},"total":300000,"partial":true,"started_at":"2026-03-04T09:50:00.000Z","ended_at":"2026-03-04T10:00:00.000Z","duration_seconds":150,"duration_available":true,"ts":"2026-03-04T10:00:00Z"} +{"action":"execute","spec_id":"SPEC-204","plan_slug":"spec-204-slug","session_id":"sess1","buckets":{"fresh_input":5000,"cache_write":25000,"cache_read":450000,"output":20000},"total":500000,"partial":true,"started_at":"2026-03-04T09:50:00.000Z","ended_at":"2026-03-04T10:05:00.000Z","duration_seconds":250,"duration_available":true,"ts":"2026-03-04T10:05:00Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/dedup-inflate.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/dedup-inflate.jsonl new file mode 100644 index 00000000..bda6c0e7 --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/dedup-inflate.jsonl @@ -0,0 +1,2 @@ +{"action":"execute","spec_id":"SPEC-202","plan_slug":"spec-202-slug","session_id":"sess1","buckets":{"fresh_input":7000,"cache_write":35000,"cache_read":630000,"output":28000},"total":700000,"partial":false,"started_at":"2026-03-02T09:50:00.000Z","ended_at":"2026-03-02T10:00:00.000Z","duration_seconds":400,"duration_available":true,"ts":"2026-03-02T10:00:00Z"} +{"action":"execute","spec_id":"SPEC-202","plan_slug":"spec-202-slug","session_id":"sess1","buckets":{"fresh_input":12000,"cache_write":60000,"cache_read":1080000,"output":48000},"total":1200000,"partial":true,"started_at":"2026-03-02T09:50:00.000Z","ended_at":"2026-03-02T10:20:00.000Z","duration_seconds":900,"duration_available":true,"ts":"2026-03-02T10:20:00Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/dedup-lower.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/dedup-lower.jsonl new file mode 100644 index 00000000..0dfb3beb --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/dedup-lower.jsonl @@ -0,0 +1,3 @@ +{"action":"execute","spec_id":"SPEC-201","plan_slug":"spec-201-slug","session_id":"sess1","buckets":{"fresh_input":10000,"cache_write":50000,"cache_read":900000,"output":40000},"total":1000000,"partial":false,"started_at":"2026-03-01T09:50:00.000Z","ended_at":"2026-03-01T10:00:00.000Z","duration_seconds":500,"duration_available":true,"ts":"2026-03-01T10:00:00Z"} +{"action":"execute","spec_id":"SPEC-201","plan_slug":"spec-201-slug","session_id":"sess1","buckets":{"fresh_input":14000,"cache_write":70000,"cache_read":1260000,"output":56000},"total":1400000,"started_at":"2026-03-01T09:50:00.000Z","ended_at":"2026-03-01T10:05:00.000Z","duration_seconds":700,"duration_available":true,"ts":"2026-03-01T10:05:00Z"} +{"action":"execute","spec_id":"SPEC-201","plan_slug":"spec-201-slug","session_id":"sess1","buckets":{"fresh_input":9000,"cache_write":45000,"cache_read":810000,"output":36000},"total":900000,"partial":true,"started_at":"2026-03-01T09:50:00.000Z","ended_at":"2026-03-01T10:10:00.000Z","duration_seconds":300,"duration_available":true,"ts":"2026-03-01T10:10:00Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/dedup-tiebreak.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/dedup-tiebreak.jsonl new file mode 100644 index 00000000..515c77a8 --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/dedup-tiebreak.jsonl @@ -0,0 +1,2 @@ +{"action":"execute","spec_id":"SPEC-203","plan_slug":"spec-203-slug","session_id":"sess1","buckets":{"fresh_input":5000,"cache_write":25000,"cache_read":450000,"output":20000},"total":500000,"partial":false,"started_at":"2026-03-03T09:50:00.000Z","ended_at":"2026-03-03T10:00:00.000Z","duration_seconds":100,"duration_available":true,"ts":"2026-03-03T10:00:00Z"} +{"action":"execute","spec_id":"SPEC-203","plan_slug":"spec-203-slug","session_id":"sess1","buckets":{"fresh_input":5000,"cache_write":25000,"cache_read":450000,"output":20000},"total":500000,"partial":false,"started_at":"2026-03-03T09:50:00.000Z","ended_at":"2026-03-03T11:00:00.000Z","duration_seconds":200,"duration_available":true,"ts":"2026-03-03T11:00:00Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/full-cycle.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/full-cycle.jsonl new file mode 100644 index 00000000..a960e5b4 --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/full-cycle.jsonl @@ -0,0 +1,4 @@ +{"action":"spec","spec_id":"SPEC-220","session_id":"sp-sess","buckets":{"fresh_input":1000,"cache_write":2000,"cache_read":30000,"output":4000},"total":37000,"partial":false,"started_at":"2026-05-01T08:50:00.000Z","ended_at":"2026-05-01T09:00:00.000Z","duration_seconds":600,"duration_available":true,"ts":"2026-05-01T09:00:00Z"} +{"action":"plan","spec_id":"SPEC-220","plan_slug":"spec-220-slug","session_id":"pl-sess","buckets":{"fresh_input":1100,"cache_write":2100,"cache_read":31000,"output":4100},"total":38300,"partial":false,"started_at":"2026-05-01T09:08:20.000Z","ended_at":"2026-05-01T09:20:00.000Z","duration_seconds":700,"duration_available":true,"ts":"2026-05-01T09:20:00Z"} +{"action":"execute","spec_id":"SPEC-220","plan_slug":"spec-220-slug","session_id":"ex-sess","buckets":{"fresh_input":1200,"cache_write":2200,"cache_read":32000,"output":4200},"total":39600,"partial":false,"started_at":"2026-05-01T09:31:40.000Z","ended_at":"2026-05-01T09:45:00.000Z","duration_seconds":800,"duration_available":true,"ts":"2026-05-01T09:45:00Z"} +{"action":"execute","spec_id":"SPEC-999","plan_slug":"unrelated-slug","session_id":"noise-sess","buckets":{"fresh_input":9999999,"cache_write":9999999,"cache_read":9999999,"output":9999999},"total":39999996,"partial":false,"started_at":"2026-05-01T09:00:00.000Z","ended_at":"2026-05-01T09:01:00.000Z","duration_seconds":60,"duration_available":true,"ts":"2026-05-01T09:01:00Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/spec-less.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/spec-less.jsonl new file mode 100644 index 00000000..a0d65a7f --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/spec-less.jsonl @@ -0,0 +1,2 @@ +{"action":"plan","spec_id":"spec-less-slug-example","plan_slug":"spec-less-slug-example","session_id":"pl1","buckets":{"fresh_input":10,"cache_write":20,"cache_read":300,"output":40},"total":370,"partial":false,"started_at":"2026-05-02T09:00:00.000Z","ended_at":"2026-05-02T09:01:00.000Z","duration_seconds":60,"duration_available":true,"ts":"2026-05-02T09:01:00Z"} +{"action":"execute","spec_id":"spec-less-slug-example","plan_slug":"spec-less-slug-example","session_id":"ex1","buckets":{"fresh_input":11,"cache_write":22,"cache_read":330,"output":44},"total":407,"partial":false,"started_at":"2026-05-02T09:05:00.000Z","ended_at":"2026-05-02T09:06:30.000Z","duration_seconds":90,"duration_available":true,"ts":"2026-05-02T09:06:30Z"} diff --git a/.gaia/scripts/tests/fixtures/token-rollup/unavailable-elapsed.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/unavailable-elapsed.jsonl new file mode 100644 index 00000000..5ad4ab34 --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/unavailable-elapsed.jsonl @@ -0,0 +1 @@ +{"action":"execute","spec_id":"SPEC-250","plan_slug":"spec-250-slug","session_id":"ex1","buckets":{"fresh_input":1000,"cache_write":2000,"cache_read":4000,"output":1000},"total":8000,"partial":false,"started_at":null,"ended_at":null,"duration_seconds":null,"duration_available":false,"ts":"2026-05-04T09:00:00Z"} diff --git a/.gaia/scripts/tests/token-rollup.bats b/.gaia/scripts/tests/token-rollup.bats new file mode 100644 index 00000000..25aff8e9 --- /dev/null +++ b/.gaia/scripts/tests/token-rollup.bats @@ -0,0 +1,253 @@ +#!/usr/bin/env bats +# +# Requires Bats >= 1.5.0 (this suite uses `run --separate-stderr`, added in 1.5). +bats_require_minimum_version 1.5.0 +# +# Bats suite for .gaia/scripts/token-rollup.sh (SPEC-017 task-rollup-reader). +# +# Every fixture ledger under fixtures/token-rollup/ is a HAND-AUTHORED oracle: +# expected sums/spans are computed by hand in the comments below, never by +# running the reader first and copying its output. +# +# Fixture inventory + hand-computed oracles: +# +# dedup-lower.jsonl (SPEC-201, one session "sess1", 3 execute rows) +# r1 total=1000000 partial:false +# r2 total=1400000 NO partial key at all (missing -> non-partial/final) +# r3 total=900000 partial:true, LATER ended_at, LOWER total +# non-partial pool = {r1, r2}; winner = max total = r2 (1400000, dur 700). +# Proves: (a) a later partial row cannot LOWER the session's figure +# (UAT-004), (b) a record with no `partial` key is treated as final and +# CAN be the winner (directive 2). +# +# dedup-inflate.jsonl (SPEC-202, one session "sess1", 2 execute rows) +# r1 total=700000 partial:false, dur 400 +# r2 total=1200000 partial:true (HIGHEST total, latest ended_at) +# non-partial pool = {r1}; winner = r1 (700000). Proves a partial row +# cannot INFLATE the session's figure even when it has the max total +# (the other half of UAT-004's success criterion). +# +# dedup-tiebreak.jsonl (SPEC-203, one session "sess1", 2 execute rows) +# r1 total=500000 ended_at 10:00 dur=100 (both non-partial) +# r2 total=500000 ended_at 11:00 dur=200 +# equal totals -> tiebreak on latest ended_at -> r2 wins. Elapsed 200s +# (3m20s) proves r2 won, not r1's 100s (1m40s). +# +# dedup-all-partial.jsonl (SPEC-204, one session "sess1", 2 execute rows, +# BOTH partial:true) -- extra coverage of the frozen algorithm's fallback +# branch (no non-partial row at all -> pool = all rows, session flagged +# partial). Winner = max total = 500000 (dur 250, 4m10s); partial marker +# must appear. +# +# cross-session.jsonl (SPEC-210, execute only, two sessions) +# sess-s1 (halted): r1 total=1000000 dur=600, r2 total=1500000 dur=1200 +# (last pre-halt row, no `partial` key on either row) -> dedup = r2. +# sess-s2 (resumed later): r3 total=2000000 dur=900, +# r4 total=2600000 dur=1500 (final merge) -> dedup = r4. +# execute total = 1500000 + 2600000 = 4100000 (UAT-003: sum across +# sessions, not S2 alone; UAT-005: halted session's last row included, +# no non-final/status field anywhere in the fixture). +# elapsed = 1200 + 1500 = 2700s = 45m0s (UAT-008: sum of each session's +# OWN active span). The WRONG naive calc, max(ended_at) - min(started_at) +# = 2026-04-01T14:25:00 - 2026-04-01T09:50:00 = 4h35m = 16500s, is a +# different number (2700 != 16500) -- a regression trap for that bug. +# No spec/plan rows exist for SPEC-210 at all, so this fixture doubles as +# the "missing action never errors" case: only execute + Total render. +# +# full-cycle.jsonl (SPEC-220: one spec, one plan, one execute row, plus one +# UNRELATED SPEC-999 execute row to prove spec_id filtering) +# spec buckets 1000/2000/30000/4000 total=37000 dur=600 (10m0s) +# plan buckets 1100/2100/31000/4100 total=38300 dur=700 (11m40s) +# execute buckets 1200/2200/32000/4200 total=39600 dur=800 (13m20s) +# grand total = 37000+38300+39600 = 114900; grand elapsed = +# 600+700+800 = 2100s = 35m0s; grand buckets = 3300/6300/93000/12300 +# (each sums to 114900, cross-checking the per-record bucket sums too). +# The noise row's total (39999996) must NOT appear anywhere in the output. +# +# spec-less.jsonl (feature key = a plan slug, not a SPEC-NNN id; plan + +# execute only, no spec row) +# plan total=370 dur=60 (1m0s); execute total=407 dur=90 (1m30s) +# grand total=777; elapsed=150s=2m30s; buckets 21/42/630/84 (sum 777). +# UAT-007: no spec line renders, exit 0, no error. +# +# corrupt.jsonl (SPEC-230: one spec row, one PLAIN-TEXT bad line, one plan +# row; no execute row) +# spec total=5000 dur=60 (1m0s); plan total=6000 dur=90 (1m30s) +# grand total = 11000; elapsed = 150s = 2m30s. UAT-010: the bad line is +# skipped, not fatal; the good rows still roll up exactly; a corrupt +# marker is appended; exit 0. +# +# unavailable-elapsed.jsonl (SPEC-250: one execute row, duration_available +# false / duration_seconds null, but a REAL total) +# total=8000, buckets 1000/2000/4000/1000 (sums to 8000). Both the +# execute line and the Total line must render "unavailable" for elapsed, +# never a fabricated "0s"; the buckets still show the real numbers; the +# partial marker is appended (elapsed unavailable is a lower-bound signal). + +setup() { + SCRIPT_DIR="$(cd "$(dirname "$BATS_TEST_FILENAME")/.." && pwd)" + SCRIPT="$SCRIPT_DIR/token-rollup.sh" + FIX="$(cd "$(dirname "$BATS_TEST_FILENAME")/fixtures/token-rollup" && pwd)" + + export GIT_AUTHOR_NAME="GAIA Test" + export GIT_AUTHOR_EMAIL="gaia-test@example.com" + export GIT_COMMITTER_NAME="GAIA Test" + export GIT_COMMITTER_EMAIL="gaia-test@example.com" +} + +# ---------- 1. UAT-004 (lower) + directive 2 (missing `partial` is final) ---------- +@test "dedup-lower: later partial row with a lower total cannot lower the winner; missing partial key can win" { + run bash "$SCRIPT" --spec-id SPEC-201 --ledger "$FIX/dedup-lower.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"execute: 1400000 (elapsed 11m40s)"* ]] + [[ "$output" == *"Total: 1400000 (elapsed 11m40s)"* ]] + [[ "$output" != *"900000"* ]] +} + +# ---------- 2. UAT-004 (inflate direction) ---------- +@test "dedup-inflate: a partial row with the HIGHEST total cannot inflate the winner" { + run bash "$SCRIPT" --spec-id SPEC-202 --ledger "$FIX/dedup-inflate.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"execute: 700000 (elapsed 6m40s)"* ]] + [[ "$output" == *"Total: 700000 (elapsed 6m40s)"* ]] + [[ "$output" != *"1200000"* ]] +} + +# ---------- 3. Tiebreak: equal totals, latest ended_at wins ---------- +@test "dedup-tiebreak: equal-total non-partial rows break on latest ended_at" { + run bash "$SCRIPT" --spec-id SPEC-203 --ledger "$FIX/dedup-tiebreak.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"execute: 500000 (elapsed 3m20s)"* ]] + [[ "$output" != *"1m40s"* ]] +} + +# ---------- 4. All-partial session (fallback pool branch) ---------- +@test "dedup-all-partial: a session with only partial rows falls back to max-total and flags partial" { + run bash "$SCRIPT" --spec-id SPEC-204 --ledger "$FIX/dedup-all-partial.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"execute: 500000 (elapsed 4m10s)"* ]] + [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] +} + +# ---------- 5. UAT-003 cross-session sum + UAT-005 halted-session inclusion ---------- +@test "cross-session: execute total sums deduped contributions from two sessions, including the halted one" { + run bash "$SCRIPT" --spec-id SPEC-210 --ledger "$FIX/cross-session.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"execute: 4100000 (elapsed 45m0s)"* ]] + [[ "$output" == *"Total: 4100000 (elapsed 45m0s)"* ]] + # regression trap: must NOT be the naive idle-gap-inclusive span (UAT-008) + [[ "$output" != *"16500"* ]] + [[ "$output" != *"4h35m"* ]] +} + +# ---------- 6. Missing action never errors (reuses cross-session: execute-only feature) ---------- +@test "cross-session: no spec/plan rows for the feature -> only execute + Total render, no crash" { + run bash "$SCRIPT" --spec-id SPEC-210 --ledger "$FIX/cross-session.jsonl" + [ "$status" -eq 0 ] + [[ "$output" != *$'\n spec:'* ]] + [[ "$output" != *$'\n plan:'* ]] +} + +# ---------- 7. UAT-006 full render: spec + plan + execute + Total + buckets ---------- +@test "full-cycle: renders spec/plan/execute/Total with correct grand buckets, unrelated feature excluded" { + run bash "$SCRIPT" --spec-id SPEC-220 --ledger "$FIX/full-cycle.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"spec: 37000 (elapsed 10m0s)"* ]] + [[ "$output" == *"plan: 38300 (elapsed 11m40s)"* ]] + [[ "$output" == *"execute: 39600 (elapsed 13m20s)"* ]] + [[ "$output" == *"Total: 114900 (elapsed 35m0s)"* ]] + [[ "$output" == *"Fresh input: 3300"* ]] + [[ "$output" == *"Cache write: 6300"* ]] + [[ "$output" == *"Cache read: 93000"* ]] + [[ "$output" == *"Output: 12300"* ]] + # the unrelated SPEC-999 noise row must never leak into this feature's roll-up + [[ "$output" != *"39999996"* ]] +} + +# ---------- 8. UAT-007 spec-less plan omits the spec line ---------- +@test "spec-less: plan-slug feature key with no spec row omits the spec line" { + run bash "$SCRIPT" --spec-id spec-less-slug-example --ledger "$FIX/spec-less.jsonl" + [ "$status" -eq 0 ] + [[ "$output" != *$'\n spec:'* ]] + [[ "$output" == *"plan: 370 (elapsed 1m0s)"* ]] + [[ "$output" == *"execute: 407 (elapsed 1m30s)"* ]] + [[ "$output" == *"Total: 777 (elapsed 2m30s)"* ]] +} + +# ---------- 9. UAT-010 corrupt line tolerated ---------- +@test "corrupt: one unparseable line among good rows is skipped, not fatal; good rows still sum" { + run bash "$SCRIPT" --spec-id SPEC-230 --ledger "$FIX/corrupt.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"spec: 5000 (elapsed 1m0s)"* ]] + [[ "$output" == *"plan: 6000 (elapsed 1m30s)"* ]] + [[ "$output" == *"Total: 11000 (elapsed 2m30s)"* ]] + [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] +} + +# ---------- 10. Unknown feature key ---------- +@test "unknown feature key: no records line, exit 0, no crash" { + run bash "$SCRIPT" --spec-id NOPE-999 --ledger "$FIX/full-cycle.jsonl" + [ "$status" -eq 0 ] + [ "$output" = "Cycle cost (NOPE-999): no ledger records found." ] +} + +# ---------- 11. Missing ledger file ---------- +@test "missing ledger file: no records line, exit 0, no crash" { + run bash "$SCRIPT" --spec-id SPEC-999 --ledger "$FIX/does-not-exist.jsonl" + [ "$status" -eq 0 ] + [ "$output" = "Cycle cost (SPEC-999): no ledger records found." ] +} + +# ---------- 12. Missing --spec-id degrades gracefully ---------- +@test "missing --spec-id: exit 0, degrades to an empty readout, no crash" { + run --separate-stderr bash "$SCRIPT" --ledger "$FIX/full-cycle.jsonl" + [ "$status" -eq 0 ] + [ "$output" = "Cycle cost (): no ledger records found." ] +} + +# ---------- 13. UAT-009 default ledger resolves to the main checkout under a worktree ---------- +@test "no --ledger: resolves the main checkout's ledger from inside a linked worktree" { + MAIN="$(cd "$BATS_TEST_TMPDIR" && pwd -P)/main" + WT="$(cd "$BATS_TEST_TMPDIR" && pwd -P)/wt" + mkdir -p "$MAIN" + git -C "$MAIN" init -q + git -C "$MAIN" commit --allow-empty -q -m "init" + git -C "$MAIN" worktree add -q "$WT" -b "feature/kickoff" + + mkdir -p "$MAIN/.gaia/local/telemetry" + cp "$FIX/dedup-lower.jsonl" "$MAIN/.gaia/local/telemetry/tokens.jsonl" + + run bash -c "cd '$WT' && bash '$SCRIPT' --spec-id SPEC-201" + [ "$status" -eq 0 ] + # if the reader mis-resolved to the worktree (which has no ledger at all), + # this would read "no ledger records found" instead of the real total. + [[ "$output" == *"execute: 1400000 (elapsed 11m40s)"* ]] + + git -C "$MAIN" worktree remove --force "$WT" 2>/dev/null || rm -rf "$WT" +} + +# ---------- 14. Always exit 0 / diagnostics on stderr, not stdout ---------- +@test "diagnostics go to stderr, not stdout, even when input is corrupt or --spec-id is missing" { + run --separate-stderr bash "$SCRIPT" --ledger "$FIX/full-cycle.jsonl" + [ "$status" -eq 0 ] + [[ "$stderr" == *"token-rollup: missing --spec-id"* ]] + [[ "$output" != *"token-rollup:"* ]] + + run --separate-stderr bash "$SCRIPT" --spec-id SPEC-230 --ledger "$FIX/corrupt.jsonl" + [ "$status" -eq 0 ] + [[ "$output" != *"token-rollup:"* ]] +} + +# ---------- 15. Never-fabricate: unavailable elapsed renders "unavailable", never "0s" ---------- +@test "unavailable-elapsed: real totals render but elapsed shows 'unavailable', never a fabricated 0s" { + run bash "$SCRIPT" --spec-id SPEC-250 --ledger "$FIX/unavailable-elapsed.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"execute: 8000 (elapsed unavailable)"* ]] + [[ "$output" == *"Total: 8000 (elapsed unavailable)"* ]] + [[ "$output" != *"elapsed 0s"* ]] + [[ "$output" == *"Fresh input: 1000"* ]] + [[ "$output" == *"Cache write: 2000"* ]] + [[ "$output" == *"Cache read: 4000"* ]] + [[ "$output" == *"Output: 1000"* ]] + [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] +} diff --git a/.gaia/scripts/token-rollup.sh b/.gaia/scripts/token-rollup.sh new file mode 100755 index 00000000..0be73112 --- /dev/null +++ b/.gaia/scripts/token-rollup.sh @@ -0,0 +1,244 @@ +#!/usr/bin/env bash +# GAIA token roll-up reader (SPEC-017). +# +# Reads the durable ledger token-tally.sh (SPEC-013) appends to +# (.gaia/local/telemetry/tokens.jsonl) and renders a full-cycle cost readout +# for one feature: spec / plan / execute token totals and elapsed spans, +# summed across every session the feature took (halted, resumed, worktree- +# split). It reads the ledger ONLY, never a transcript. +# +# Dedup (frozen, see the plan's README.md FC-1): within an action, group +# ledger rows by session_id; a session's winning row is the max-`.total` row +# among its NON-partial rows (a missing `partial` field counts as non-partial +# / final); only when EVERY row for that session is partial does the pool +# fall back to all of the session's rows (and the session is flagged +# partial). Ties break on the latest `.ended_at` (string compare, null -> +# ""). +# +# Grand elapsed is the SUM of every winning row's own duration_seconds (each +# session's own first-to-last-billed-turn span) — it deliberately excludes +# idle gaps between sessions, so it is NOT max(ended_at) - min(started_at) +# across all rows. +# +# CLI: +# bash .gaia/scripts/token-rollup.sh --spec-id [--ledger ] +# +# Behavior: +# - Exit code is ALWAYS 0. stdout carries ONLY the roll-up block; all +# diagnostics go to stderr. No number is ever fabricated; unreadable or +# partial input degrades to a lower-bound figure with a trailing marker +# line instead of aborting or guessing. +# +# DO NOT add `set -e`; every step degrades to a partial/empty readout rather +# than aborting. + +log() { + printf '%s\n' "$*" >&2 +} + +is_uint() { + case "$1" in + ''|*[!0-9]*) return 1 ;; + *) return 0 ;; + esac +} + +# Pinned human duration format, identical to token-tally.sh: hms, +# dropping any leading zero-valued unit. +human_duration() { + local total="$1" h m s + h=$(( total / 3600 )) + m=$(( (total % 3600) / 60 )) + s=$(( total % 60 )) + if (( h > 0 )); then + printf '%dh%dm%ds' "$h" "$m" "$s" + elif (( m > 0 )); then + printf '%dm%ds' "$m" "$s" + else + printf '%ds' "$s" + fi +} + +# ---------- argument parsing (never crash on a bad/missing flag) ---------- +FEATURE_KEY="" +LEDGER_OVERRIDE="" + +while [[ $# -gt 0 ]]; do + key="$1" + case "$key" in + --spec-id|--ledger) + val="${2:-}" + case "$key" in + --spec-id) FEATURE_KEY="$val" ;; + --ledger) LEDGER_OVERRIDE="$val" ;; + esac + # `shift 2` fails (and does NOT shift) when a flag is the final arg with + # no value, which would spin this loop forever; fall back to a single shift. + shift 2 2>/dev/null || shift + ;; + *) + log "token-rollup: ignoring unknown argument: $key" + shift + ;; + esac +done + +[[ -z "$FEATURE_KEY" ]] && log "token-rollup: missing --spec-id" + +no_records() { + printf 'Cycle cost (%s): no ledger records found.\n' "$FEATURE_KEY" + exit 0 +} + +# ---------- ledger resolution, same as token-tally.sh's resolve_ledger ---------- +# main_root = dirname(absolute(git rev-parse --git-common-dir)), so a run +# inside a linked worktree reads the surviving main ledger, not a worktree +# copy that was never written. --ledger overrides (test seam). +resolve_ledger() { + if [[ -n "$LEDGER_OVERRIDE" ]]; then + printf '%s' "$LEDGER_OVERRIDE" + return 0 + fi + local common_dir abs main_root + common_dir="$(git rev-parse --git-common-dir 2>/dev/null)" + [[ -z "$common_dir" ]] && return 1 + case "$common_dir" in + /*) abs="$common_dir" ;; + *) abs="$PWD/$common_dir" ;; + esac + main_root="$(cd "$(dirname "$abs")" 2>/dev/null && pwd)" + [[ -z "$main_root" ]] && return 1 + printf '%s' "$main_root/.gaia/local/telemetry/tokens.jsonl" +} + +LEDGER="" +if ledger_path="$(resolve_ledger)" && [[ -n "$ledger_path" ]]; then + LEDGER="$ledger_path" +else + log "token-rollup: could not resolve ledger path" +fi + +[[ -z "$LEDGER" || ! -f "$LEDGER" ]] && no_records + +# ---------- corrupt-line-tolerant parse + feature filter ---------- +# A line that fails to parse as JSON is skipped and bumps `bad`; it never +# aborts the read, and a single bad line never drops the good ones (UAT-010). +corrupt=0 +parsed="$(jq -R -s --arg fk "$FEATURE_KEY" ' + split("\n") | map(select(length > 0)) + | map(try fromjson catch "__BAD__") + | { + bad: (map(select(. == "__BAD__")) | length), + recs: (map(select(. != "__BAD__")) | map(select(.spec_id == $fk))) + } +' "$LEDGER" 2>/dev/null)" + +if [[ -z "$parsed" ]]; then + log "token-rollup: ledger unreadable: $LEDGER" + no_records +fi + +bad_count="$(jq -r '.bad' <<<"$parsed" 2>/dev/null)" +is_uint "$bad_count" || bad_count=0 +(( bad_count > 0 )) && corrupt=1 + +recs="$(jq -c '.recs' <<<"$parsed" 2>/dev/null)" +[[ -z "$recs" ]] && recs="[]" +recs_count="$(jq -r 'length' <<<"$recs" 2>/dev/null)" +is_uint "$recs_count" || recs_count=0 +(( recs_count == 0 )) && no_records + +# ---------- dedup + aggregate (frozen algorithm) ---------- +summary="$(jq -c ' + def winner_of($pool): + $pool | map(. + {_ended: (.ended_at // "")}) | sort_by([(.total // 0), ._ended]) | last; + + def dedup_session($sess): + ($sess | map(select(.partial != true))) as $nonpartial + | (if ($nonpartial | length) > 0 then $nonpartial else $sess end) as $pool + | { winner: winner_of($pool), session_partial: (($nonpartial | length) == 0) }; + + . as $recs + | ( ["spec", "plan", "execute"] + | map( + . as $action + | ($recs | map(select(.action == $action))) as $actRecs + | select(($actRecs | length) > 0) + | ($actRecs | group_by(.session_id) | map(dedup_session(.))) as $sr + | { + action: $action, + total: ([$sr[].winner.total] | add // 0), + elapsed: ([$sr[] | (if .winner.duration_available == true then (.winner.duration_seconds // 0) else 0 end)] | add // 0), + elapsed_available: ([$sr[].winner.duration_available] | any), + elapsed_partial: ([$sr[].winner.duration_available] | map(. != true) | any), + session_partial: ([$sr[].session_partial] | any), + winners: [$sr[].winner] + } + ) + ) as $actions + | { + actions: $actions, + grand_total: ($actions | map(.total) | add // 0), + grand_elapsed: ($actions | map(.elapsed) | add // 0), + grand_elapsed_available: ($actions | map(.elapsed_available) | any), + grand_elapsed_partial: ($actions | map(.elapsed_partial) | any), + grand_session_partial: ($actions | map(.session_partial) | any), + buckets: { + fresh_input: ($actions | map(.winners[].buckets.fresh_input) | add // 0), + cache_write: ($actions | map(.winners[].buckets.cache_write) | add // 0), + cache_read: ($actions | map(.winners[].buckets.cache_read) | add // 0), + output: ($actions | map(.winners[].buckets.output) | add // 0) + } + } +' <<<"$recs" 2>/dev/null)" + +if [[ -z "$summary" ]]; then + log "token-rollup: aggregation failed" + no_records +fi + +actions_len="$(jq -r '.actions | length' <<<"$summary" 2>/dev/null)" +is_uint "$actions_len" || actions_len=0 +(( actions_len == 0 )) && no_records + +# ---------- render (stdout = payload only) ---------- +IFS=$'\t' read -r grand_total grand_elapsed grand_elapsed_available grand_elapsed_partial grand_session_partial fresh cwrite cread out < <( + jq -r '[.grand_total, .grand_elapsed, .grand_elapsed_available, .grand_elapsed_partial, .grand_session_partial, + .buckets.fresh_input, .buckets.cache_write, .buckets.cache_read, .buckets.output] | @tsv' <<<"$summary" +) +is_uint "$grand_total" || grand_total=0 +is_uint "$grand_elapsed" || grand_elapsed=0 +is_uint "$fresh" || fresh=0 +is_uint "$cwrite" || cwrite=0 +is_uint "$cread" || cread=0 +is_uint "$out" || out=0 + +printf 'Cycle cost (%s):\n' "$FEATURE_KEY" + +while IFS=$'\t' read -r a_action a_total a_elapsed a_avail; do + is_uint "$a_total" || a_total=0 + is_uint "$a_elapsed" || a_elapsed=0 + if [[ "$a_avail" == "true" ]]; then + a_elapsed_str="$(human_duration "$a_elapsed")" + else + a_elapsed_str="unavailable" + fi + printf ' %-11s%8d (elapsed %s)\n' "$a_action:" "$a_total" "$a_elapsed_str" +done < <(jq -r '.actions[] | [.action, .total, .elapsed, .elapsed_available] | @tsv' <<<"$summary") + +if [[ "$grand_elapsed_available" == "true" ]]; then + total_elapsed_str="$(human_duration "$grand_elapsed")" +else + total_elapsed_str="unavailable" +fi +printf ' %-11s%8d (elapsed %s)\n' "Total:" "$grand_total" "$total_elapsed_str" +printf ' Fresh input: %s\n' "$fresh" +printf ' Cache write: %s\n' "$cwrite" +printf ' Cache read: %s\n' "$cread" +printf ' Output: %s\n' "$out" + +if (( corrupt == 1 )) || [[ "$grand_elapsed_partial" == "true" ]] || [[ "$grand_session_partial" == "true" ]]; then + printf ' (partial: some ledger input was unreadable or lacked timing; figures are a lower bound)\n' +fi + +exit 0 diff --git a/.gaia/tests/hooks/token-tally-git-op.bats b/.gaia/tests/hooks/token-tally-git-op.bats new file mode 100644 index 00000000..5575db4f --- /dev/null +++ b/.gaia/tests/hooks/token-tally-git-op.bats @@ -0,0 +1,289 @@ +#!/usr/bin/env bats +# +# Bats suite for .claude/hooks/token-tally-git-op.sh (UAT-001/UAT-002/UAT-009) +# and its shared resolver lib .claude/hooks/lib/gaia-active-plan.sh. +# +# Every test runs the hook with cwd = a tmp git repo, never the real repo +# root: token-tally.sh's ledger resolution walks up from cwd via +# `git rev-parse --git-common-dir`, so running from the real repo would +# append test rows to the real .gaia/local/telemetry/tokens.jsonl. Each tmp +# repo gets its own copy of the built lib + the real token-tally.sh at their +# repo-relative paths (build_repo below), matching what a real checkout has. +# +# Session `fixturesession0001` against the anchor fixture +# (.gaia/scripts/tests/fixtures/token-tally/projects) is the same +# hand-computed oracle token-tally.bats uses: total 11110. + +setup() { + HELPERS="$BATS_TEST_DIRNAME/helpers" + REPO_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + HOOK_ABS="$REPO_ROOT/.claude/hooks/token-tally-git-op.sh" + LIB_SRC="$REPO_ROOT/.claude/hooks/lib/gaia-active-plan.sh" + TALLY_SRC="$REPO_ROOT/.gaia/scripts/token-tally.sh" + ANCHOR="$REPO_ROOT/.gaia/scripts/tests/fixtures/token-tally/projects" + SESSION="fixturesession0001" + + export GIT_AUTHOR_NAME="GAIA Test" + export GIT_AUTHOR_EMAIL="gaia-test@example.com" + export GIT_COMMITTER_NAME="GAIA Test" + export GIT_COMMITTER_EMAIL="gaia-test@example.com" +} + +teardown() { + [ -n "${REPO:-}" ] && rm -rf "$REPO" + [ -n "${WT:-}" ] && [ -d "$WT" ] && rm -rf "$WT" + [ -n "${MAIN:-}" ] && rm -rf "$MAIN" + return 0 +} + +# Scaffolds a tmp git repo with the built lib + the real token-tally.sh +# copied in at their repo-relative paths, preserving the executable bit. +# Sets $REPO. +build_repo() { + REPO="$("$HELPERS/tmp-git-repo.sh")" + mkdir -p "$REPO/.claude/hooks/lib" "$REPO/.gaia/scripts" + cp "$LIB_SRC" "$REPO/.claude/hooks/lib/gaia-active-plan.sh" + chmod +x "$REPO/.claude/hooks/lib/gaia-active-plan.sh" + cp "$TALLY_SRC" "$REPO/.gaia/scripts/token-tally.sh" + chmod +x "$REPO/.gaia/scripts/token-tally.sh" +} + +write_running() { + # write_running + mkdir -p "$1" + { printf 'branch: %s\n' "$2"; printf 'slug: %s\n' "$(basename "$1")"; printf 'started: %s\n' "$3"; } > "$1/RUNNING" +} + +write_readme_with_spec() { + # write_readme_with_spec + mkdir -p "$1" + { + printf '# Plan\n\n' + printf '## Source SPEC\n\n' + printf 'Derived from %s (%s).\n' "$(basename "$(dirname "$2")")" "$2" + } > "$1/README.md" +} + +write_readme_spec_less() { + mkdir -p "$1" + printf '# Plan\n\nNo source spec here.\n' > "$1/README.md" +} + +run_hook() { + # run_hook [projects_root] + local cmd="$1" proot="${2:-$ANCHOR}" + local input + input=$("$HELPERS/mock-hook-input.sh" pre-tool-use "$SESSION" Bash "$cmd") + run env GAIA_TALLY_PROJECTS_ROOT="$proot" bash -c "echo '$input' | '$HOOK_ABS'" +} + +# ---------- 1. Git commit with active plan folder -> keyed execute record (UAT-001) ---------- +@test "git commit with active plan folder records a keyed execute record" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-013/SPEC.md" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + + run_hook "git commit -m x" + [ "$status" -eq 0 ] + [ -z "$output" ] + + LEDGER="$REPO/.gaia/local/telemetry/tokens.jsonl" + [ -f "$LEDGER" ] + [ "$(jq -r '.action' "$LEDGER")" = "execute" ] + [ "$(jq -r '.spec_id' "$LEDGER")" = "SPEC-013" ] + [ "$(jq -r '.plan_slug' "$LEDGER")" = "my-plan" ] + [ "$(jq -r '.total' "$LEDGER")" -eq 11110 ] + [ "$(jq -r '.partial' "$LEDGER")" = "false" ] + [ "$(jq -r '.session_id' "$LEDGER")" = "$SESSION" ] +} + +# ---------- 2. git push also records ---------- +@test "git push also records an execute row" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-013/SPEC.md" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + + run_hook "git push" + [ "$status" -eq 0 ] + + LEDGER="$REPO/.gaia/local/telemetry/tokens.jsonl" + [ -f "$LEDGER" ] + [ "$(jq -r '.action' "$LEDGER")" = "execute" ] +} + +# ---------- 3. Negative gate: no plan folder -> no record (UAT-002) ---------- +@test "no plan folder at all: no record written" { + build_repo + cd "$REPO" + + run_hook "git commit -m x" + [ "$status" -eq 0 ] + [ ! -f "$REPO/.gaia/local/telemetry/tokens.jsonl" ] +} + +# ---------- 4. Negative gate: plan folder exists but branch does not match ---------- +@test "plan folder exists but no RUNNING matches the branch: no record" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/other-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-099/SPEC.md" + write_running "$plan_dir" "some-other-branch" "2026-07-01T00:00:00Z" + + run_hook "git commit -m x" + [ "$status" -eq 0 ] + [ ! -f "$REPO/.gaia/local/telemetry/tokens.jsonl" ] +} + +# ---------- 5. Non-git command / git status: no record, no transcript parse ---------- +@test "non-git command: no record" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-013/SPEC.md" + write_running "$plan_dir" "$(git branch --show-current)" "2026-07-01T00:00:00Z" + + run_hook "ls -la" + [ "$status" -eq 0 ] + [ ! -f "$REPO/.gaia/local/telemetry/tokens.jsonl" ] +} + +@test "git status: no record (commit/push-only matching)" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-013/SPEC.md" + write_running "$plan_dir" "$(git branch --show-current)" "2026-07-01T00:00:00Z" + + run_hook "git status" + [ "$status" -eq 0 ] + [ ! -f "$REPO/.gaia/local/telemetry/tokens.jsonl" ] +} + +# ---------- 6. Feature-key resolution matches step 4.8 ---------- +@test "feature key resolves via basename(dirname(SPEC path))" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-042/SPEC.md" + write_running "$plan_dir" "$(git branch --show-current)" "2026-07-01T00:00:00Z" + + run_hook "git commit -m x" + [ "$status" -eq 0 ] + [ "$(jq -r '.spec_id' "$REPO/.gaia/local/telemetry/tokens.jsonl")" = "SPEC-042" ] +} + +@test "spec-less plan README: feature key falls back to the plan slug" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/spec-less-slug" + write_readme_spec_less "$plan_dir" + write_running "$plan_dir" "$(git branch --show-current)" "2026-07-01T00:00:00Z" + + run_hook "git commit -m x" + [ "$status" -eq 0 ] + [ "$(jq -r '.spec_id' "$REPO/.gaia/local/telemetry/tokens.jsonl")" = "spec-less-slug" ] +} + +# ---------- 7. Disambiguation by latest started ---------- +@test "two matching plan folders disambiguate on the latest started timestamp" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + + old_dir="$REPO/.gaia/local/plans/old-plan" + write_readme_with_spec "$old_dir" "/abs/root/.gaia/local/specs/SPEC-001/SPEC.md" + write_running "$old_dir" "$branch" "2026-07-01T00:00:00Z" + + new_dir="$REPO/.gaia/local/plans/new-plan" + write_readme_with_spec "$new_dir" "/abs/root/.gaia/local/specs/SPEC-002/SPEC.md" + write_running "$new_dir" "$branch" "2026-07-02T00:00:00Z" + + run_hook "git commit -m x" + [ "$status" -eq 0 ] + LEDGER="$REPO/.gaia/local/telemetry/tokens.jsonl" + [ "$(jq -r '.spec_id' "$LEDGER")" = "SPEC-002" ] + [ "$(jq -r '.plan_slug' "$LEDGER")" = "new-plan" ] +} + +# ---------- 8. Heredoc / commit-message false-match guard ---------- +@test "git commit mentioned inside a quoted string is not matched" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-013/SPEC.md" + write_running "$plan_dir" "$(git branch --show-current)" "2026-07-01T00:00:00Z" + + run_hook 'echo "remember to git commit later"' + [ "$status" -eq 0 ] + [ ! -f "$REPO/.gaia/local/telemetry/tokens.jsonl" ] +} + +@test "git commit mentioned in heredoc body prose is not matched" { + build_repo + cd "$REPO" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-013/SPEC.md" + write_running "$plan_dir" "$(git branch --show-current)" "2026-07-01T00:00:00Z" + + heredoc_cmd=$'cat </dev/null || rm -rf "$WT" + [ -f "$MAIN_LEDGER" ] +} From aa85266072a9f3168691e0ef12c1190575bd6b83 Mon Sep 17 00:00:00 2001 From: Steven Sacks Date: Fri, 3 Jul 2026 21:54:40 +0900 Subject: [PATCH 2/7] feat(gaia): SPEC-017 phase 2 - merge-time roll-up hook + hook wiring Add .claude/hooks/token-rollup-merge.sh (PostToolUse Bash): on gh pr merge, resolve the feature key from the active plan folder (or the ledger's most recent execute row as a labeled fallback) and render the full-cycle spec/plan/execute/total roll-up into the merging session. Register both new hooks in .claude/settings.json (token-tally-git-op.sh under PreToolUse, token-rollup-merge.sh under PostToolUse) and replace plan.md's manual execute-tally instruction with the automatic git-op recording plus roll-up reporting, so no phase is double-counted. Also drop a stray em dash from token-rollup.sh's header comment. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/hooks/token-rollup-merge.sh | 92 ++++++++ .claude/settings.json | 9 + .claude/skills/gaia/references/plan.md | 15 +- .gaia/scripts/token-rollup.sh | 2 +- .gaia/tests/hooks/token-rollup-merge.bats | 262 ++++++++++++++++++++++ 5 files changed, 371 insertions(+), 9 deletions(-) create mode 100755 .claude/hooks/token-rollup-merge.sh create mode 100644 .gaia/tests/hooks/token-rollup-merge.bats diff --git a/.claude/hooks/token-rollup-merge.sh b/.claude/hooks/token-rollup-merge.sh new file mode 100755 index 00000000..01ad6f2b --- /dev/null +++ b/.claude/hooks/token-rollup-merge.sh @@ -0,0 +1,92 @@ +#!/usr/bin/env bash +# PostToolUse Bash hook on `gh pr merge`. Renders the full-cycle token-cost +# roll-up (spec / plan / execute / total) for the merging feature into the +# session's context. Session-independent: resolves the feature key from +# on-disk state only, the active plan folder or, failing that, the ledger's +# most recent execute record, so it renders from any session that runs the +# merge, including a fresh top-level session that never ran the plan itself. + +set -euo pipefail +trap 'exit 0' ERR + +command -v jq >/dev/null 2>&1 || exit 0 + +payload=$(cat) +tool_name=$(jq -r '.tool_name // ""' <<<"$payload") +[ "$tool_name" = "Bash" ] || exit 0 + +cmd=$(jq -r '.tool_input.command // ""' <<<"$payload") + +# Match `gh pr merge` as a real shell invocation, at command start or right +# after a shell separator (&&, ;, ||, |, newline), never inside a heredoc +# body or a quoted string (e.g. a commit message mentioning it in prose). +# Mirrors pr-merge-audit-check.sh's command match. +start_re='^[[:space:]]*gh[[:space:]]+pr[[:space:]]+merge([[:space:]]|$)' +sep_re=$'(\\&\\&|;|\\|\\||\\||\n)[[:space:]]*gh[[:space:]]+pr[[:space:]]+merge([[:space:]]|$)' +if [[ "$cmd" =~ $start_re ]]; then + : +elif [[ "$cmd" =~ $sep_re ]]; then + : +else + exit 0 +fi + +feature_key="" +fallback=0 + +# Primary: the active plan folder for this branch, keyed the same way the +# plan's own execute records are (the RUNNING sentinel + README Source SPEC). +# Present at merge time because the plan's self-cleanup runs only after the +# merge is confirmed, so this resolves correctly for the normal in-session +# merge. +if [ -f .claude/hooks/lib/gaia-active-plan.sh ]; then + . .claude/hooks/lib/gaia-active-plan.sh + plan_dir="$(resolve_active_plan_dir)" || true + if [ -n "$plan_dir" ]; then + feature_key="$(resolve_feature_key "$plan_dir")" || true + fi +fi + +# Fallback: best-effort, for a fresh session with no active plan folder in +# view (e.g. a worktree-continuation merge). Keys to the most-recent execute +# record in the ledger, resolved the same way token-tally.sh / token-rollup.sh +# resolve it (the main checkout, even when run from a linked worktree). This +# is not guaranteed to be the merging feature (an interleaved prior feature's +# execute row could be newer), so it is labeled at render time. +if [ -z "$feature_key" ]; then + common_dir=$(git rev-parse --git-common-dir 2>/dev/null || true) + if [ -n "$common_dir" ]; then + case "$common_dir" in + /*) abs="$common_dir" ;; + *) abs="$PWD/$common_dir" ;; + esac + main_root=$(cd "$(dirname "$abs")" 2>/dev/null && pwd || true) + if [ -n "$main_root" ]; then + ledger="$main_root/.gaia/local/telemetry/tokens.jsonl" + if [ -f "$ledger" ]; then + feature_key=$(jq -R -s -r ' + split("\n") | map(select(length > 0)) + | map(try fromjson catch empty) + | map(select(type == "object" and .action == "execute" and (.spec_id // "") != "")) + | sort_by(.ts // "") + | last + | .spec_id // empty + ' "$ledger" 2>/dev/null || true) + [ -n "$feature_key" ] && fallback=1 + fi + fi + fi +fi + +[ -n "$feature_key" ] || exit 0 + +rollup=$(bash .gaia/scripts/token-rollup.sh --spec-id "$feature_key" 2>/dev/null || true) +[ -n "$rollup" ] || exit 0 + +if [ "$fallback" -eq 1 ]; then + printf '[cycle cost at merge - feature key resolved from the ledger'"'"'s most recent execution; no active plan folder was found]\n%s\n' "$rollup" +else + printf '[cycle cost at merge]\n%s\n' "$rollup" +fi + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json index 9edc2d6f..752b7278 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -143,6 +143,11 @@ "command": ".claude/hooks/pr-merge-audit-check.sh", "statusMessage": "Checking PR-merge audit gate…" }, + { + "type": "command", + "command": ".claude/hooks/token-tally-git-op.sh", + "statusMessage": "Recording execute-phase token tally…" + }, { "type": "command", "command": ".claude/hooks/worthiness-presence-check.sh", @@ -179,6 +184,10 @@ "type": "command", "command": ".claude/hooks/wiki-commit-nudge.sh" }, + { + "type": "command", + "command": ".claude/hooks/token-rollup-merge.sh" + }, { "type": "command", "command": ".claude/hooks/capture-red-observations.sh" diff --git a/.claude/skills/gaia/references/plan.md b/.claude/skills/gaia/references/plan.md index e25475b4..e0b63b6a 100644 --- a/.claude/skills/gaia/references/plan.md +++ b/.claude/skills/gaia/references/plan.md @@ -190,19 +190,18 @@ Then write the following files directly to `{PLAN_DIR}/`: - **Stop conditions.** On any sub-agent failure or quality-gate failure: STOP and surface to the user. Do not "fix and continue", do not commit, do not push. Before stopping, append the failure context (which phase, which sub-agent, error) to `SUMMARY.md` under a `## Phase N, (HALTED)` block so the user and any follow-up session see the same record. - **Final summary.** After all implementation phases pass and the final commit is pushed, before awaiting merge confirmation, **read `{PLAN_DIR}/SUMMARY.md`** and print a brief summary to the user: phases completed, sub-agents run, files touched (count), commits pushed (count + short SHAs), PR URL, quality-gate status, and the highest-signal findings/deviations/follow-ups drawn from `SUMMARY.md` so nothing is lost to context compression. Keep it tight, a few lines plus the surfaced notes, not a recap of every change. - **Token tally (execute-time).** After the pre-merge `code-review-audit`'s clean-pass marker is written and before the Final self-cleanup phase deletes the plan folder, the orchestrator runs the token tally for this KICKOFF execution and reports the printed four-bucket total and wall-clock elapsed to the user, so the run's dominant sub-agent fan-outs (including the pre-merge audit) are all counted, and the `--out-dir` still exists (the ledger itself lives in the main checkout and survives cleanup). Substitute the plan's real SPEC id (from the `## Source SPEC` section of `README.md`, or the plan slug if the plan has no SPEC), the real plan slug, and the absolute plan directory: + **Token tally (execute-time).** Execute-phase token tallies are recorded automatically: a `PreToolUse` hook on the orchestrator's per-phase git commit/push records this session's execute tally to the durable ledger, keyed to the feature (the SPEC id resolved from the active plan folder, or the plan slug when spec-less). Resumed, halted, and worktree sessions are all captured. The orchestrator does not run a manual execute tally, doing so would double-count the phase. + + After the pre-merge `code-review-audit`'s clean-pass marker is written and before the Final self-cleanup phase deletes the plan folder, the orchestrator reports the full-cycle cost by running the roll-up reader and surfacing its spec / plan / execute / total breakdown plus wall-clock elapsed to the user. Substitute the plan's real SPEC id (from the `## Source SPEC` section of `README.md`, or the plan slug if the plan has no SPEC): ```bash - if [ -x .gaia/scripts/token-tally.sh ]; then - bash .gaia/scripts/token-tally.sh \ - --action execute \ - --spec-id "<SPEC-NNN from README's Source SPEC, or the plan slug if none>" \ - --plan-slug "<plan slug = basename of the plan dir>" \ - --out-dir "<absolute plan dir>" || true + if [ -x .gaia/scripts/token-rollup.sh ]; then + bash .gaia/scripts/token-rollup.sh \ + --spec-id "<SPEC-NNN from README's Source SPEC, or the plan slug if none>" || true fi ``` - This attributes the whole execution session (main transcript plus every phase sub-agent sidecar, deduped to ground truth) to the plan, appends a durable ledger record keyed to the plan, and reports the tally to the user. It never blocks: the `-x` guard and trailing `|| true` mean a missing or failing helper degrades silently. Because the tally runs after the audit's sub-agent fan-out and counts every sidecar, the reported total is at least the sidecar-only sum. + A `PostToolUse` hook on `gh pr merge` renders the same roll-up at the merge boundary, so the readout also appears when the merge runs from a fresh top-level session. The reader never blocks and never fabricates a number: the `-x` guard and trailing `|| true` mean a missing or failing helper degrades silently, and an unreadable ledger degrades to a partial or absent figure with a marker. - **Final self-cleanup phase (last step before merge).** After all implementation phases pass and the user has reviewed the PR and confirmed it is ready to merge, the orchestrator deletes its own plan folder so scaffolding does not persist locally. Delete it by its literal repo-relative path: `rm -rf .gaia/local/plans/<slug>` (substitute the plan's slug). The literal path matches the project's `rm -rf .gaia/local/plans/*` permission and the `block-rm-rf.sh` whitelist, so it clears without a prompt; do not reconstruct an absolute path from variables (`"$ROOT/$PLAN_REL"`), which both misses that permission match and trips the empty-variable rm guard. This removes `SUMMARY.md` along with everything else, by this point its content has already been surfaced in the Final summary. Then check `git check-ignore .gaia/local/plans/`, if it is gitignored (the GAIA default), the deletion is invisible to git: skip the commit and report "plan folder removed locally; gitignored, no commit needed." If the path is tracked, commit and push the deletion as the final commit on the PR. If the user explicitly asks to keep the plan folder for archival, the orchestrator skips the deletion and reports. - **Post-merge worktree cleanup (worktree-mode runs only).** When the orchestrator's pre-flight chose worktree mode (or the run was dispatched into a worktree by upstream tooling), the post-merge phase runs the cleanup procedure below AFTER the user confirms the PR is merged. The procedure detects the squash-merge state and discards the worktree without prompting (the SPEC clarifications.answered confirms pre-consent: the orchestrator told the user "after merge, the worktree will be discarded" before opening the PR; the user merging the PR is the consent). 1. Confirm merge via `gh pr view <N> --json state`. Parse the JSON; require `.state == "MERGED"`. If not merged, do NOT proceed, surface to user and stop. diff --git a/.gaia/scripts/token-rollup.sh b/.gaia/scripts/token-rollup.sh index 0be73112..fe9f4078 100755 --- a/.gaia/scripts/token-rollup.sh +++ b/.gaia/scripts/token-rollup.sh @@ -16,7 +16,7 @@ # ""). # # Grand elapsed is the SUM of every winning row's own duration_seconds (each -# session's own first-to-last-billed-turn span) — it deliberately excludes +# session's own first-to-last-billed-turn span); it deliberately excludes # idle gaps between sessions, so it is NOT max(ended_at) - min(started_at) # across all rows. # diff --git a/.gaia/tests/hooks/token-rollup-merge.bats b/.gaia/tests/hooks/token-rollup-merge.bats new file mode 100644 index 00000000..cca94e04 --- /dev/null +++ b/.gaia/tests/hooks/token-rollup-merge.bats @@ -0,0 +1,262 @@ +#!/usr/bin/env bats +# +# Bats suite for .claude/hooks/token-rollup-merge.sh (UAT-006/007/010, directive 5). +# +# Every test runs the hook with cwd = a tmp git repo, never the real repo +# root: the hook sources gaia-active-plan.sh and shells out to +# token-rollup.sh via repo-relative paths, and the reader resolves the ledger +# via `git rev-parse --git-common-dir`. Running from the real repo would read +# the live .gaia/local/telemetry/tokens.jsonl. Each tmp repo gets its own copy +# of the built lib + the real token-rollup.sh at their repo-relative paths +# (build_repo below), matching what a real checkout has. + +setup() { + HELPERS="$BATS_TEST_DIRNAME/helpers" + REPO_ROOT="$(cd "$BATS_TEST_DIRNAME/../../.." && pwd)" + HOOK_ABS="$REPO_ROOT/.claude/hooks/token-rollup-merge.sh" + LIB_SRC="$REPO_ROOT/.claude/hooks/lib/gaia-active-plan.sh" + ROLLUP_SRC="$REPO_ROOT/.gaia/scripts/token-rollup.sh" + + export GIT_AUTHOR_NAME="GAIA Test" + export GIT_AUTHOR_EMAIL="gaia-test@example.com" + export GIT_COMMITTER_NAME="GAIA Test" + export GIT_COMMITTER_EMAIL="gaia-test@example.com" +} + +teardown() { + [ -n "${REPO:-}" ] && rm -rf "$REPO" + return 0 +} + +# Scaffolds a tmp git repo with the built lib + the real token-rollup.sh +# copied in at their repo-relative paths, preserving the executable bit. +# Sets $REPO. +build_repo() { + REPO="$("$HELPERS/tmp-git-repo.sh")" + mkdir -p "$REPO/.claude/hooks/lib" "$REPO/.gaia/scripts" + cp "$LIB_SRC" "$REPO/.claude/hooks/lib/gaia-active-plan.sh" + chmod +x "$REPO/.claude/hooks/lib/gaia-active-plan.sh" + cp "$ROLLUP_SRC" "$REPO/.gaia/scripts/token-rollup.sh" + chmod +x "$REPO/.gaia/scripts/token-rollup.sh" +} + +write_running() { + # write_running <plan_dir> <branch> <started> + mkdir -p "$1" + { printf 'branch: %s\n' "$2"; printf 'slug: %s\n' "$(basename "$1")"; printf 'started: %s\n' "$3"; } > "$1/RUNNING" +} + +write_readme_with_spec() { + # write_readme_with_spec <plan_dir> <spec_path> + mkdir -p "$1" + { + printf '# Plan\n\n' + printf '## Source SPEC\n\n' + printf 'Derived from %s (%s).\n' "$(basename "$(dirname "$2")")" "$2" + } > "$1/README.md" +} + +write_readme_spec_less() { + mkdir -p "$1" + printf '# Plan\n\nNo source spec here.\n' > "$1/README.md" +} + +ledger_path() { + printf '%s/.gaia/local/telemetry/tokens.jsonl' "$REPO" +} + +# write_record <action> <spec_id> <session_id> <total> <ts> [<ended_at>] +write_record() { + local action="$1" spec_id="$2" sid="$3" total="$4" ts="$5" ended="${6:-$ts}" + mkdir -p "$(dirname "$(ledger_path)")" + jq -nc --arg action "$action" --arg spec_id "$spec_id" --arg sid "$sid" \ + --argjson total "$total" --arg ts "$ts" --arg ended "$ended" \ + '{action:$action, spec_id:$spec_id, plan_slug:"my-plan", session_id:$sid, + buckets:{fresh_input:$total, cache_write:0, cache_read:0, output:0}, + total:$total, partial:false, started_at:$ended, ended_at:$ended, + duration_seconds:10, duration_available:true, ts:$ts}' >> "$(ledger_path)" +} + +run_hook() { + # run_hook <command> + local cmd="$1" input + input=$("$HELPERS/mock-hook-input.sh" post-tool-use S1 Bash "$cmd") + run bash -c "echo '$input' | '$HOOK_ABS'" +} + +# ---------- 1. Renders spec+plan+execute+Total at merge (UAT-006) ---------- +@test "renders the roll-up at merge with spec+plan+execute" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-042/SPEC.md" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + + write_record spec SPEC-042 sess-spec 100 "2026-06-01T00:00:00Z" + write_record plan SPEC-042 sess-plan 200 "2026-06-02T00:00:00Z" + write_record execute SPEC-042 sess-exec 300 "2026-06-03T00:00:00Z" + + run_hook "gh pr merge 7 --squash" + [ "$status" -eq 0 ] + [[ "$output" == *"[cycle cost at merge]"* ]] + [[ "$output" == *"Cycle cost (SPEC-042)"* ]] + [[ "$output" == *"spec:"* ]] + [[ "$output" == *"plan:"* ]] + [[ "$output" == *"execute:"* ]] + [[ "$output" == *"Total:"* ]] + [[ "$output" == *"600"* ]] +} + +# ---------- 2. Spec-less plan omits the spec line (UAT-007) ---------- +@test "spec-less plan omits the spec line" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/spec-less-plan" + write_readme_spec_less "$plan_dir" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + + write_record plan spec-less-plan sess-plan 150 "2026-06-02T00:00:00Z" + write_record execute spec-less-plan sess-exec 250 "2026-06-03T00:00:00Z" + + run_hook "gh pr merge" + [ "$status" -eq 0 ] + [[ "$output" == *"[cycle cost at merge]"* ]] + [[ "$output" != *"spec:"* ]] + [[ "$output" == *"plan:"* ]] + [[ "$output" == *"execute:"* ]] + [[ "$output" == *"Total:"* ]] + [[ "$output" == *"400"* ]] +} + +# ---------- 3. Fresh session, no plan folder -> ledger fallback (directive 5) ---------- +@test "fresh session with no active plan folder falls back to the ledger and labels itself" { + build_repo + cd "$REPO" + # No plan folder at all: this is the fresh-top-level-session case. + write_record execute SPEC-042 sess-exec 300 "2026-06-03T00:00:00Z" + + run_hook "gh pr merge" + [ "$status" -eq 0 ] + [[ "$output" == *"resolved from the ledger"* ]] + [[ "$output" == *"Cycle cost (SPEC-042)"* ]] + [[ "$output" == *"execute:"* ]] + [[ "$output" == *"300"* ]] +} + +# ---------- 4. Fallback picks the most-recent execute feature ---------- +@test "fallback picks the execute record with the latest ts" { + build_repo + cd "$REPO" + write_record execute SPEC-001 sess-a 100 "2026-06-01T00:00:00Z" + write_record execute SPEC-002 sess-b 200 "2026-06-05T00:00:00Z" + + run_hook "gh pr merge" + [ "$status" -eq 0 ] + [[ "$output" == *"Cycle cost (SPEC-002)"* ]] + [[ "$output" != *"Cycle cost (SPEC-001)"* ]] +} + +# ---------- 5. Primary resolver wins over a newer unrelated execute row ---------- +@test "active plan folder wins over a newer unrelated feature's execute row" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-042/SPEC.md" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + + # SPEC-042's own (older) execute record. + write_record execute SPEC-042 sess-a 300 "2026-06-01T00:00:00Z" + # A globally newer execute row for an unrelated, interleaved feature. + write_record execute SPEC-999 sess-b 999 "2026-06-09T00:00:00Z" + + run_hook "gh pr merge" + [ "$status" -eq 0 ] + [[ "$output" == *"Cycle cost (SPEC-042)"* ]] + [[ "$output" != *"Cycle cost (SPEC-999)"* ]] + [[ "$output" != *"resolved from the ledger"* ]] +} + +# ---------- 6. Non-merge command: silent ---------- +@test "non-merge git command: silent" { + build_repo + cd "$REPO" + run_hook "git commit -m x" + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "gh pr view is not a merge: silent" { + build_repo + cd "$REPO" + run_hook "gh pr view 7" + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +# ---------- 7. Corrupt / missing ledger never blocks (UAT-010) ---------- +@test "corrupt ledger line does not block; the good execute record still renders" { + build_repo + cd "$REPO" + write_record execute SPEC-042 sess-a 300 "2026-06-01T00:00:00Z" + echo 'not-json-garbage' >> "$(ledger_path)" + + run_hook "gh pr merge" + [ "$status" -eq 0 ] + [[ "$output" == *"Cycle cost (SPEC-042)"* ]] + [[ "$output" == *"execute:"* ]] +} + +@test "no active plan folder and no ledger at all: exit 0, empty stdout" { + build_repo + cd "$REPO" + # No plan folder, no ledger file: nothing to resolve a feature key from. + run_hook "gh pr merge" + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +# ---------- 8. Heredoc / quoted-string false-match guard ---------- +@test "gh pr merge mentioned only inside heredoc body prose: not matched" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-042/SPEC.md" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + write_record execute SPEC-042 sess-a 300 "2026-06-01T00:00:00Z" + + heredoc_cmd=$'cat <<EOF\nPlease remember to gh pr merge later.\nEOF' + run_hook "$heredoc_cmd" + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +@test "gh pr merge mentioned inside a quoted string: not matched" { + build_repo + cd "$REPO" + run_hook 'echo "remember to gh pr merge later"' + [ "$status" -eq 0 ] + [ -z "$output" ] +} + +# ---------- 9. Renders regardless of the merge subprocess's own exit ---------- +@test "renders even when tool_response reports a failed merge" { + build_repo + cd "$REPO" + branch="$(git branch --show-current)" + plan_dir="$REPO/.gaia/local/plans/my-plan" + write_readme_with_spec "$plan_dir" "/abs/root/.gaia/local/specs/SPEC-042/SPEC.md" + write_running "$plan_dir" "$branch" "2026-07-01T00:00:00Z" + write_record execute SPEC-042 sess-a 300 "2026-06-01T00:00:00Z" + + input=$(jq -n --arg sid "S1" --arg cmd "gh pr merge 7 --squash" \ + '{session_id:$sid, transcript_path:"/tmp/t.jsonl", cwd:".", hook_event_name:"PostToolUse", + tool_name:"Bash", tool_input:{command:$cmd}, + tool_response:{stdout:"", stderr:"merge failed", exit_code:1, interrupted:false}}') + run bash -c "echo '$input' | '$HOOK_ABS'" + [ "$status" -eq 0 ] + [[ "$output" == *"Cycle cost (SPEC-042)"* ]] +} From 0d804ac763516100a8e93f078ec9dd74add2ac07 Mon Sep 17 00:00:00 2001 From: Steven Sacks <stevensacks@gmail.com> Date: Fri, 3 Jul 2026 22:21:07 +0900 Subject: [PATCH 3/7] feat(gaia): comma-group and right-align the cycle-cost readout Format token-rollup.sh's rendered action totals, grand total, and billing buckets with thousands separators, right-aligned into shared columns (totals to the grand-total width, buckets to the widest bucket) for readability. Ledger values and all internal arithmetic stay raw; grouping happens only at the print layer. Update the reader's bats oracles to match. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- .gaia/scripts/tests/token-rollup.bats | 64 +++++++++++++-------------- .gaia/scripts/token-rollup.sh | 38 +++++++++++++--- 2 files changed, 64 insertions(+), 38 deletions(-) diff --git a/.gaia/scripts/tests/token-rollup.bats b/.gaia/scripts/tests/token-rollup.bats index 25aff8e9..7bd5c1d9 100644 --- a/.gaia/scripts/tests/token-rollup.bats +++ b/.gaia/scripts/tests/token-rollup.bats @@ -99,25 +99,25 @@ setup() { @test "dedup-lower: later partial row with a lower total cannot lower the winner; missing partial key can win" { run bash "$SCRIPT" --spec-id SPEC-201 --ledger "$FIX/dedup-lower.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"execute: 1400000 (elapsed 11m40s)"* ]] - [[ "$output" == *"Total: 1400000 (elapsed 11m40s)"* ]] - [[ "$output" != *"900000"* ]] + [[ "$output" == *"execute: 1,400,000 (elapsed 11m40s)"* ]] + [[ "$output" == *"Total: 1,400,000 (elapsed 11m40s)"* ]] + [[ "$output" != *"900,000"* ]] } # ---------- 2. UAT-004 (inflate direction) ---------- @test "dedup-inflate: a partial row with the HIGHEST total cannot inflate the winner" { run bash "$SCRIPT" --spec-id SPEC-202 --ledger "$FIX/dedup-inflate.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"execute: 700000 (elapsed 6m40s)"* ]] - [[ "$output" == *"Total: 700000 (elapsed 6m40s)"* ]] - [[ "$output" != *"1200000"* ]] + [[ "$output" == *"execute: 700,000 (elapsed 6m40s)"* ]] + [[ "$output" == *"Total: 700,000 (elapsed 6m40s)"* ]] + [[ "$output" != *"1,200,000"* ]] } # ---------- 3. Tiebreak: equal totals, latest ended_at wins ---------- @test "dedup-tiebreak: equal-total non-partial rows break on latest ended_at" { run bash "$SCRIPT" --spec-id SPEC-203 --ledger "$FIX/dedup-tiebreak.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"execute: 500000 (elapsed 3m20s)"* ]] + [[ "$output" == *"execute: 500,000 (elapsed 3m20s)"* ]] [[ "$output" != *"1m40s"* ]] } @@ -125,7 +125,7 @@ setup() { @test "dedup-all-partial: a session with only partial rows falls back to max-total and flags partial" { run bash "$SCRIPT" --spec-id SPEC-204 --ledger "$FIX/dedup-all-partial.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"execute: 500000 (elapsed 4m10s)"* ]] + [[ "$output" == *"execute: 500,000 (elapsed 4m10s)"* ]] [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] } @@ -133,8 +133,8 @@ setup() { @test "cross-session: execute total sums deduped contributions from two sessions, including the halted one" { run bash "$SCRIPT" --spec-id SPEC-210 --ledger "$FIX/cross-session.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"execute: 4100000 (elapsed 45m0s)"* ]] - [[ "$output" == *"Total: 4100000 (elapsed 45m0s)"* ]] + [[ "$output" == *"execute: 4,100,000 (elapsed 45m0s)"* ]] + [[ "$output" == *"Total: 4,100,000 (elapsed 45m0s)"* ]] # regression trap: must NOT be the naive idle-gap-inclusive span (UAT-008) [[ "$output" != *"16500"* ]] [[ "$output" != *"4h35m"* ]] @@ -152,16 +152,16 @@ setup() { @test "full-cycle: renders spec/plan/execute/Total with correct grand buckets, unrelated feature excluded" { run bash "$SCRIPT" --spec-id SPEC-220 --ledger "$FIX/full-cycle.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"spec: 37000 (elapsed 10m0s)"* ]] - [[ "$output" == *"plan: 38300 (elapsed 11m40s)"* ]] - [[ "$output" == *"execute: 39600 (elapsed 13m20s)"* ]] - [[ "$output" == *"Total: 114900 (elapsed 35m0s)"* ]] - [[ "$output" == *"Fresh input: 3300"* ]] - [[ "$output" == *"Cache write: 6300"* ]] - [[ "$output" == *"Cache read: 93000"* ]] - [[ "$output" == *"Output: 12300"* ]] + [[ "$output" == *"spec: 37,000 (elapsed 10m0s)"* ]] + [[ "$output" == *"plan: 38,300 (elapsed 11m40s)"* ]] + [[ "$output" == *"execute: 39,600 (elapsed 13m20s)"* ]] + [[ "$output" == *"Total: 114,900 (elapsed 35m0s)"* ]] + [[ "$output" == *"Fresh input: 3,300"* ]] + [[ "$output" == *"Cache write: 6,300"* ]] + [[ "$output" == *"Cache read: 93,000"* ]] + [[ "$output" == *"Output: 12,300"* ]] # the unrelated SPEC-999 noise row must never leak into this feature's roll-up - [[ "$output" != *"39999996"* ]] + [[ "$output" != *"39,999,996"* ]] } # ---------- 8. UAT-007 spec-less plan omits the spec line ---------- @@ -169,18 +169,18 @@ setup() { run bash "$SCRIPT" --spec-id spec-less-slug-example --ledger "$FIX/spec-less.jsonl" [ "$status" -eq 0 ] [[ "$output" != *$'\n spec:'* ]] - [[ "$output" == *"plan: 370 (elapsed 1m0s)"* ]] - [[ "$output" == *"execute: 407 (elapsed 1m30s)"* ]] - [[ "$output" == *"Total: 777 (elapsed 2m30s)"* ]] + [[ "$output" == *"plan: 370 (elapsed 1m0s)"* ]] + [[ "$output" == *"execute: 407 (elapsed 1m30s)"* ]] + [[ "$output" == *"Total: 777 (elapsed 2m30s)"* ]] } # ---------- 9. UAT-010 corrupt line tolerated ---------- @test "corrupt: one unparseable line among good rows is skipped, not fatal; good rows still sum" { run bash "$SCRIPT" --spec-id SPEC-230 --ledger "$FIX/corrupt.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"spec: 5000 (elapsed 1m0s)"* ]] - [[ "$output" == *"plan: 6000 (elapsed 1m30s)"* ]] - [[ "$output" == *"Total: 11000 (elapsed 2m30s)"* ]] + [[ "$output" == *"spec: 5,000 (elapsed 1m0s)"* ]] + [[ "$output" == *"plan: 6,000 (elapsed 1m30s)"* ]] + [[ "$output" == *"Total: 11,000 (elapsed 2m30s)"* ]] [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] } @@ -221,7 +221,7 @@ setup() { [ "$status" -eq 0 ] # if the reader mis-resolved to the worktree (which has no ledger at all), # this would read "no ledger records found" instead of the real total. - [[ "$output" == *"execute: 1400000 (elapsed 11m40s)"* ]] + [[ "$output" == *"execute: 1,400,000 (elapsed 11m40s)"* ]] git -C "$MAIN" worktree remove --force "$WT" 2>/dev/null || rm -rf "$WT" } @@ -242,12 +242,12 @@ setup() { @test "unavailable-elapsed: real totals render but elapsed shows 'unavailable', never a fabricated 0s" { run bash "$SCRIPT" --spec-id SPEC-250 --ledger "$FIX/unavailable-elapsed.jsonl" [ "$status" -eq 0 ] - [[ "$output" == *"execute: 8000 (elapsed unavailable)"* ]] - [[ "$output" == *"Total: 8000 (elapsed unavailable)"* ]] + [[ "$output" == *"execute: 8,000 (elapsed unavailable)"* ]] + [[ "$output" == *"Total: 8,000 (elapsed unavailable)"* ]] [[ "$output" != *"elapsed 0s"* ]] - [[ "$output" == *"Fresh input: 1000"* ]] - [[ "$output" == *"Cache write: 2000"* ]] - [[ "$output" == *"Cache read: 4000"* ]] - [[ "$output" == *"Output: 1000"* ]] + [[ "$output" == *"Fresh input: 1,000"* ]] + [[ "$output" == *"Cache write: 2,000"* ]] + [[ "$output" == *"Cache read: 4,000"* ]] + [[ "$output" == *"Output: 1,000"* ]] [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] } diff --git a/.gaia/scripts/token-rollup.sh b/.gaia/scripts/token-rollup.sh index fe9f4078..d9dc3ffd 100755 --- a/.gaia/scripts/token-rollup.sh +++ b/.gaia/scripts/token-rollup.sh @@ -59,6 +59,19 @@ human_duration() { fi } +# Group a non-negative integer with thousands separators for DISPLAY ONLY; the +# stored ledger values and all internal arithmetic stay raw. A non-numeric +# input echoes back unchanged. +commify() { + local n="$1" out="" + is_uint "$n" || { printf '%s' "$n"; return 0; } + while (( ${#n} > 3 )); do + out=",${n: -3}${out}" + n="${n:0:${#n}-3}" + done + printf '%s%s' "$n" "$out" +} + # ---------- argument parsing (never crash on a bad/missing flag) ---------- FEATURE_KEY="" LEDGER_OVERRIDE="" @@ -215,6 +228,11 @@ is_uint "$out" || out=0 printf 'Cycle cost (%s):\n' "$FEATURE_KEY" +# Totals share one right-aligned column; the grand total is >= every action +# total, so its commified width is the column width for the action + Total lines. +grand_total_c="$(commify "$grand_total")" +tw=${#grand_total_c} + while IFS=$'\t' read -r a_action a_total a_elapsed a_avail; do is_uint "$a_total" || a_total=0 is_uint "$a_elapsed" || a_elapsed=0 @@ -223,7 +241,7 @@ while IFS=$'\t' read -r a_action a_total a_elapsed a_avail; do else a_elapsed_str="unavailable" fi - printf ' %-11s%8d (elapsed %s)\n' "$a_action:" "$a_total" "$a_elapsed_str" + printf ' %-11s%*s (elapsed %s)\n' "$a_action:" "$tw" "$(commify "$a_total")" "$a_elapsed_str" done < <(jq -r '.actions[] | [.action, .total, .elapsed, .elapsed_available] | @tsv' <<<"$summary") if [[ "$grand_elapsed_available" == "true" ]]; then @@ -231,11 +249,19 @@ if [[ "$grand_elapsed_available" == "true" ]]; then else total_elapsed_str="unavailable" fi -printf ' %-11s%8d (elapsed %s)\n' "Total:" "$grand_total" "$total_elapsed_str" -printf ' Fresh input: %s\n' "$fresh" -printf ' Cache write: %s\n' "$cwrite" -printf ' Cache read: %s\n' "$cread" -printf ' Output: %s\n' "$out" +printf ' %-11s%*s (elapsed %s)\n' "Total:" "$tw" "$grand_total_c" "$total_elapsed_str" + +# Buckets share their own right-aligned column, widened to the largest of the four. +fresh_c="$(commify "$fresh")"; cwrite_c="$(commify "$cwrite")" +cread_c="$(commify "$cread")"; out_c="$(commify "$out")" +bw=${#fresh_c} +(( ${#cwrite_c} > bw )) && bw=${#cwrite_c} +(( ${#cread_c} > bw )) && bw=${#cread_c} +(( ${#out_c} > bw )) && bw=${#out_c} +printf ' %-14s%*s\n' "Fresh input:" "$bw" "$fresh_c" +printf ' %-14s%*s\n' "Cache write:" "$bw" "$cwrite_c" +printf ' %-14s%*s\n' "Cache read:" "$bw" "$cread_c" +printf ' %-14s%*s\n' "Output:" "$bw" "$out_c" if (( corrupt == 1 )) || [[ "$grand_elapsed_partial" == "true" ]] || [[ "$grand_session_partial" == "true" ]]; then printf ' (partial: some ledger input was unreadable or lacked timing; figures are a lower bound)\n' From 216839696e7480bce94edb2f4607c13d5e296300 Mon Sep 17 00:00:00 2001 From: Steven Sacks <stevensacks@gmail.com> Date: Fri, 3 Jul 2026 22:30:58 +0900 Subject: [PATCH 4/7] docs(gaia): SPEC-017 CHANGELOG entry for execute cost accounting (#539) --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81cba9c7..70ffdfda 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ A release change that requires the adopter to act, run a command or hand-migrate ### Changed +- plan-execution token and wall-clock accounting now records execute cost from the orchestrator's own git commits and pushes through a `PreToolUse` hook instead of a manual, instruction-driven tally, so no phase is double-counted; it aggregates the whole execution across every session and dedups re-invocation rows on read. A new merge-time `PostToolUse` hook renders a full-cycle spec / plan / execute / total cost breakdown at `gh pr merge` (the spec line omitted for a spec-less plan), and a new `.gaia/scripts/token-rollup.sh` reader surfaces the same roll-up on demand (#539) - `/gaia-plan` now pins each plan-execution task sub-agent to Sonnet by default instead of letting it inherit the orchestrator session's model (typically Opus). The feature's complexity is resolved upstream in `/gaia-spec` and `/gaia-plan` and their audits, so execution runs faster and cheaper on the lighter model; the planner can still escalate a specific deep-synthesis phase to Opus, naming which and why in `ORCHESTRATOR.md` (#538) - `/update-deps` completes the flow on a `main`/`master` run: it now writes the update commit with a load-bearing `chore(deps)` subject (previously Phase 8 referenced "the update commit" but never wrote one), opens the PR, and merges it once the required checks are green (`--auto` under branch protection), then verifies the terminal `MERGED` state and cleans up the local checkout. A run on any other branch (or in CI) is unchanged: it pushes and leaves the PR to the branch owner. The `chore(deps)` subject clears the merge gate's dep-bump bypass, so the PR stays turnkey-mergeable without a code-review-audit marker (#534) - GAIA's Serena code-search routing guidance is now language-agnostic: the advisory `code-search` rule nudges toward Serena's LSP-backed symbol tools for symbol queries in any language Serena indexes for the project (not only TypeScript, and no longer scoped to `app/`/`test/`), so an adopter who configures another language server gets the same routing. The enforcement guard stays deliberately TypeScript-conservative and tsconfig-gated, since a wrong hard-block on a non-TS search is worse than a miss while the rule only nudges (#533) From ac4787599ed401a53f64c19dfa51327a16cb4370 Mon Sep 17 00:00:00 2001 From: Steven Sacks <stevensacks@gmail.com> Date: Fri, 3 Jul 2026 23:07:47 +0900 Subject: [PATCH 5/7] fix(gaia): guard token-rollup jq against valid-JSON non-object ledger lines (#539) A ledger line that is valid JSON but not an object (a bare scalar or array) survived the try/fromjson corrupt-line guard, then threw on .spec_id indexing and aborted the whole filter, silently dropping every good row. Coerce non-objects to the existing __BAD__ sentinel so they bump the bad count and fire the partial marker instead. Adds a bare-scalar/array fixture and a regression test. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- .../token-rollup/corrupt-nonobject.jsonl | 4 ++++ .gaia/scripts/tests/token-rollup.bats | 19 +++++++++++++++++++ .gaia/scripts/token-rollup.sh | 9 +++++++-- 3 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 .gaia/scripts/tests/fixtures/token-rollup/corrupt-nonobject.jsonl diff --git a/.gaia/scripts/tests/fixtures/token-rollup/corrupt-nonobject.jsonl b/.gaia/scripts/tests/fixtures/token-rollup/corrupt-nonobject.jsonl new file mode 100644 index 00000000..c5992c73 --- /dev/null +++ b/.gaia/scripts/tests/fixtures/token-rollup/corrupt-nonobject.jsonl @@ -0,0 +1,4 @@ +{"action":"spec","spec_id":"SPEC-231","session_id":"sp1","buckets":{"fresh_input":300,"cache_write":600,"cache_read":1800,"output":300},"total":3000,"partial":false,"started_at":"2026-05-04T09:00:00.000Z","ended_at":"2026-05-04T09:01:00.000Z","duration_seconds":60,"duration_available":true,"ts":"2026-05-04T09:01:00Z"} +42 +{"action":"plan","spec_id":"SPEC-231","plan_slug":"spec-231-slug","session_id":"pl1","buckets":{"fresh_input":400,"cache_write":800,"cache_read":2400,"output":400},"total":4000,"partial":false,"started_at":"2026-05-04T09:05:00.000Z","ended_at":"2026-05-04T09:07:00.000Z","duration_seconds":120,"duration_available":true,"ts":"2026-05-04T09:07:00Z"} +[1, 2] diff --git a/.gaia/scripts/tests/token-rollup.bats b/.gaia/scripts/tests/token-rollup.bats index 7bd5c1d9..ff3abb59 100644 --- a/.gaia/scripts/tests/token-rollup.bats +++ b/.gaia/scripts/tests/token-rollup.bats @@ -77,6 +77,15 @@ bats_require_minimum_version 1.5.0 # skipped, not fatal; the good rows still roll up exactly; a corrupt # marker is appended; exit 0. # +# corrupt-nonobject.jsonl (SPEC-231: one spec row, a bare `42` line, one plan +# row, an array `[1, 2]` line) +# spec total=3000 dur=60 (1m0s); plan total=4000 dur=120 (2m0s) +# grand total = 7000; elapsed = 180s = 3m0s. The two non-object lines are +# valid JSON that `try fromjson` does not catch; without a type guard, +# indexing `.spec_id` on them throws and drops every good row. They are +# treated like an unparseable line: skipped, bad-count bumped, partial +# marker appended; exit 0. +# # unavailable-elapsed.jsonl (SPEC-250: one execute row, duration_available # false / duration_seconds null, but a REAL total) # total=8000, buckets 1000/2000/4000/1000 (sums to 8000). Both the @@ -184,6 +193,16 @@ setup() { [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] } +# ---------- 9b. Valid-JSON non-object line tolerated (guards the .spec_id throw) ---------- +@test "corrupt-nonobject: a valid-JSON non-object line (bare scalar / array) is skipped, not fatal; good rows still sum" { + run bash "$SCRIPT" --spec-id SPEC-231 --ledger "$FIX/corrupt-nonobject.jsonl" + [ "$status" -eq 0 ] + [[ "$output" == *"spec: 3,000 (elapsed 1m0s)"* ]] + [[ "$output" == *"plan: 4,000 (elapsed 2m0s)"* ]] + [[ "$output" == *"Total: 7,000 (elapsed 3m0s)"* ]] + [[ "$output" == *"(partial: some ledger input was unreadable or lacked timing"* ]] +} + # ---------- 10. Unknown feature key ---------- @test "unknown feature key: no records line, exit 0, no crash" { run bash "$SCRIPT" --spec-id NOPE-999 --ledger "$FIX/full-cycle.jsonl" diff --git a/.gaia/scripts/token-rollup.sh b/.gaia/scripts/token-rollup.sh index d9dc3ffd..ae428dcb 100755 --- a/.gaia/scripts/token-rollup.sh +++ b/.gaia/scripts/token-rollup.sh @@ -134,12 +134,17 @@ fi [[ -z "$LEDGER" || ! -f "$LEDGER" ]] && no_records # ---------- corrupt-line-tolerant parse + feature filter ---------- -# A line that fails to parse as JSON is skipped and bumps `bad`; it never -# aborts the read, and a single bad line never drops the good ones (UAT-010). +# A line that is not a well-formed JSON object -- unparseable, or valid JSON +# that is not an object (a bare scalar or array) -- is skipped and bumps `bad`; +# it never aborts the read, and a single bad line never drops the good ones +# (UAT-010). The non-object coercion matters because `try fromjson` only rescues +# parse failures: a bare `42` survives it, then indexing `.spec_id` on a number +# throws and aborts the whole filter, silently dropping every good row. corrupt=0 parsed="$(jq -R -s --arg fk "$FEATURE_KEY" ' split("\n") | map(select(length > 0)) | map(try fromjson catch "__BAD__") + | map(if type == "object" then . else "__BAD__" end) | { bad: (map(select(. == "__BAD__")) | length), recs: (map(select(. != "__BAD__")) | map(select(.spec_id == $fk))) From 7275b7e345e6cd16ccbd5b15e988d7cc005ed915 Mon Sep 17 00:00:00 2001 From: Steven Sacks <stevensacks@gmail.com> Date: Fri, 3 Jul 2026 23:44:31 +0900 Subject: [PATCH 6/7] docs(gaia): correct token-hook matcher comments re heredoc line-start edge (#539) The command matchers use a newline as a shell separator, so a heredoc body line that begins with the matched command does match. The comments overclaimed 'never inside a heredoc body'; correct them to note the benign, accepted edge (mid-line prose in a quoted string still does not match). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com> --- .claude/hooks/token-rollup-merge.sh | 6 ++++-- .claude/hooks/token-tally-git-op.sh | 10 ++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.claude/hooks/token-rollup-merge.sh b/.claude/hooks/token-rollup-merge.sh index 01ad6f2b..6285fa2b 100755 --- a/.claude/hooks/token-rollup-merge.sh +++ b/.claude/hooks/token-rollup-merge.sh @@ -18,8 +18,10 @@ tool_name=$(jq -r '.tool_name // ""' <<<"$payload") cmd=$(jq -r '.tool_input.command // ""' <<<"$payload") # Match `gh pr merge` as a real shell invocation, at command start or right -# after a shell separator (&&, ;, ||, |, newline), never inside a heredoc -# body or a quoted string (e.g. a commit message mentioning it in prose). +# after a shell separator (&&, ;, ||, |, newline), not when mentioned mid-line +# in prose or a quoted string (e.g. a commit message). The newline separator +# does match a heredoc body line that begins with the command; that edge is +# benign (a spurious readout with no merge) and accepted. # Mirrors pr-merge-audit-check.sh's command match. start_re='^[[:space:]]*gh[[:space:]]+pr[[:space:]]+merge([[:space:]]|$)' sep_re=$'(\\&\\&|;|\\|\\||\\||\n)[[:space:]]*gh[[:space:]]+pr[[:space:]]+merge([[:space:]]|$)' diff --git a/.claude/hooks/token-tally-git-op.sh b/.claude/hooks/token-tally-git-op.sh index 17a492f1..3c9a9fe2 100755 --- a/.claude/hooks/token-tally-git-op.sh +++ b/.claude/hooks/token-tally-git-op.sh @@ -19,10 +19,12 @@ tool_name=$(jq -r '.tool_name // ""' <<<"$payload") cmd=$(jq -r '.tool_input.command // ""' <<<"$payload") # Match `git commit` or `git push` as a real shell invocation, at command -# start or right after a shell separator (&&, ;, ||, |, newline), never -# inside a heredoc body or a quoted string (e.g. a commit message that -# mentions the command in prose). Bash `=~` gives whole-string semantics; -# `grep` is line-oriented and would match heredoc body lines too. +# start or right after a shell separator (&&, ;, ||, |, newline), not when +# mentioned mid-line in prose or a quoted string (e.g. a commit message). +# Bash `=~` gives whole-string semantics; `grep` is line-oriented and would +# match every heredoc body line. The newline separator here still matches a +# heredoc body line that begins with the command; that edge is benign (one +# extra tally row the per-session dedup collapses) and accepted. start_re='^[[:space:]]*git[[:space:]]+(commit|push)([[:space:]]|$)' sep_re=$'(\\&\\&|;|\\|\\||\\||\n)[[:space:]]*git[[:space:]]+(commit|push)([[:space:]]|$)' if [[ "$cmd" =~ $start_re ]]; then From 660487ae06a03dd52531720fe7b1ef06707bb22d Mon Sep 17 00:00:00 2001 From: Steven Sacks <stevensacks@gmail.com> Date: Fri, 3 Jul 2026 23:51:06 +0900 Subject: [PATCH 7/7] chore: code review audit passed GAIA-Audit: 1.6.1 08733aec711cd5dc94cc611f1f20a0f20284b16b