diff --git a/.github/workflows/release-production.yml b/.github/workflows/release-production.yml index 7e444875aa..c50157c2ba 100644 --- a/.github/workflows/release-production.yml +++ b/.github/workflows/release-production.yml @@ -372,6 +372,11 @@ jobs: with: ref: ${{ needs.prepare-build.outputs.build_ref }} fetch-depth: 1 + # Targeted init (not `submodules: true`) so we skip the large tauri-cef + # fork the core image doesn't need. The Dockerfile COPYs vendor/ because + # [patch.crates-io] resolves tinyagents from vendor/tinyagents. + - name: Init tinyagents submodule + run: git submodule update --init vendor/tinyagents - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Log in to GHCR diff --git a/.github/workflows/release-staging.yml b/.github/workflows/release-staging.yml index 22b26dd93e..8fb162fa6a 100644 --- a/.github/workflows/release-staging.yml +++ b/.github/workflows/release-staging.yml @@ -265,6 +265,11 @@ jobs: with: ref: ${{ needs.prepare-build.outputs.build_ref }} fetch-depth: 1 + # Targeted init (not `submodules: true`) so we skip the large tauri-cef + # fork the core image doesn't need. The Dockerfile COPYs vendor/ because + # [patch.crates-io] resolves tinyagents from vendor/tinyagents. + - name: Init tinyagents submodule + run: git submodule update --init vendor/tinyagents - name: Set up Docker Buildx uses: docker/setup-buildx-action@v3 - name: Build image (no push) diff --git a/.gitignore b/.gitignore index dc7c864982..1f9f80e7a8 100644 --- a/.gitignore +++ b/.gitignore @@ -126,3 +126,4 @@ distribution.cer # Release note previews CHANGELOG.preview.md *.profraw +*.diff \ No newline at end of file diff --git a/.gitmodules b/.gitmodules index 72a5f05a08..8f61e06ed6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -5,3 +5,6 @@ [submodule "app/src-tauri/vendor/tauri-plugin-notification"] path = app/src-tauri/vendor/tauri-plugin-notification url = https://github.com/tinyhumansai/tauri-plugin-notification.git +[submodule "vendor/tinyagents"] + path = vendor/tinyagents + url = https://github.com/tinyhumansai/tinyagents diff --git a/Cargo.lock b/Cargo.lock index 2bfc7fa610..6aec97037a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6676,9 +6676,7 @@ dependencies = [ [[package]] name = "tinyagents" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ebc9790a96fe910c98c60758fad23bd33faeb77889bc2355e6dc3f5aa70e0c" +version = "1.5.0" dependencies = [ "async-trait", "futures", diff --git a/Cargo.toml b/Cargo.toml index c1545fdca2..b3dfbe01c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,7 +42,7 @@ crate-type = ["rlib"] tinyplace = "1.0.1" # tinyflows — host-agnostic workflow engine (typed node graph → validate → compile → # run on tinyagents). Powers the "Workflows" feature via the seam in -# `src/openhuman/tinyflows/` + the `flows::` domain. Pulls tinyagents 1.3 transitively +# `src/openhuman/tinyflows/` + the `flows::` domain. Pulls tinyagents 1.5 transitively # (same version openhuman already uses — no conflict). Published on crates.io. tinyflows = "0.3" # TinyAgents — Rust LLM orchestration framework (LangGraph/LangChain-style): @@ -56,7 +56,7 @@ tinyflows = "0.3" # aligned to 0.40, avoiding duplicate `links = "sqlite3"` native bindings. # Durable graph checkpoints still use `SqlRunLedgerCheckpointer` until the # migration re-points those rows to the crate checkpointer. -tinyagents = { version = "1.3", features = ["sqlite"] } +tinyagents = { version = "1.5.0", features = ["sqlite"] } # TokenJuice code compressor — AST-aware signature extraction. Optional (C build) # behind the default `tokenjuice-treesitter` feature; disabling it falls back to # the language-agnostic brace-depth heuristic. See src/openhuman/tokenjuice/compressors/code.rs. @@ -230,7 +230,7 @@ fantoccini = { version = "0.22.0", optional = true, default-features = false, fe serde-big-array = { version = "0.5", optional = true } pdf-extract = "0.10" # WhatsApp Web — upstream `whatsapp-rust` 0.5. Its Diesel-backed sqlite-storage -# feature links sqlite3 separately from rusqlite 0.40, so the TinyAgents 1.3 +# feature links sqlite3 separately from rusqlite 0.40, so the TinyAgents 1.5 # baseline compiles this provider against wacore's in-memory Backend until a # rusqlite-backed durable store lands. whatsapp-rust = { version = "0.5", optional = true, default-features = false, features = ["tokio-runtime"] } @@ -331,6 +331,12 @@ unexpected_cfgs = { level = "warn", check-cfg = ['cfg(coverage)'] } # See: https://github.com/tinyhumansai/openhuman/issues/273 [patch.crates-io] whisper-rs-sys = { git = "https://github.com/tinyhumansai/whisper-rs-sys.git", branch = "main" } +# TinyAgents is vendored as a git submodule (pinned at the released tag) so +# migration work can change the SDK source in-tree, test it against OpenHuman +# immediately, and PR the diff upstream from the submodule. Keep the submodule +# version in lockstep with the `tinyagents` requirement above. After cloning: +# `git submodule update --init vendor/tinyagents` (worktrees included). +tinyagents = { path = "vendor/tinyagents" } # Emit just enough DWARF in release builds for Sentry to symbolicate Rust # panics + render surrounding source lines. `line-tables-only` keeps the diff --git a/Dockerfile b/Dockerfile index 00be4887c7..477f8e6032 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,6 +45,11 @@ WORKDIR /build # Cache dependencies — copy only manifests first COPY Cargo.toml Cargo.lock rust-toolchain.toml ./ +# Vendored TinyAgents SDK (git submodule; [patch.crates-io] points here, so +# the dep-cache build below already resolves it). CI must init the submodule +# before docker build — see the "Init tinyagents submodule" steps in +# release-production.yml / release-staging.yml. +COPY vendor/ vendor/ # Create a dummy src to build deps RUN mkdir -p src && \ echo 'fn main() {}' > src/main.rs && \ diff --git a/app/src-tauri/Cargo.lock b/app/src-tauri/Cargo.lock index 83a08db474..12d74d0459 100644 --- a/app/src-tauri/Cargo.lock +++ b/app/src-tauri/Cargo.lock @@ -9077,9 +9077,7 @@ dependencies = [ [[package]] name = "tinyagents" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3ebc9790a96fe910c98c60758fad23bd33faeb77889bc2355e6dc3f5aa70e0c" +version = "1.5.0" dependencies = [ "async-trait", "futures", diff --git a/app/src-tauri/Cargo.toml b/app/src-tauri/Cargo.toml index 8d0d9af8e3..4b7f9fe477 100644 --- a/app/src-tauri/Cargo.toml +++ b/app/src-tauri/Cargo.toml @@ -207,6 +207,10 @@ e2e-test-support = ["openhuman_core/e2e-test-support"] # whisper.cpp to use MSVC's static runtime (/MT), matching CEF and avoiding # LNK2038/LNK1169 CRT conflicts on Windows. whisper-rs-sys = { git = "https://github.com/tinyhumansai/whisper-rs-sys.git", branch = "main" } +# TinyAgents vendored submodule (repo-root vendor/tinyagents, pinned at the +# released tag) — same patch as the root Cargo world so both resolve the +# in-tree SDK source. `git submodule update --init vendor/tinyagents` first. +tinyagents = { path = "../../vendor/tinyagents" } # CEF support lives on the `feat/cef` branch of tauri-apps/tauri. We carry our # own fork at tinyhumansai/tauri-cef on `feat/cef-notification-intercept` which diff --git a/docs/tinyagents-full-migration-plan/00-baseline.md b/docs/tinyagents-full-migration-plan/00-baseline.md index 10d7fff0a8..b41bcb0491 100644 --- a/docs/tinyagents-full-migration-plan/00-baseline.md +++ b/docs/tinyagents-full-migration-plan/00-baseline.md @@ -1,21 +1,21 @@ # 00 — Baseline: crate, features, native links -Current status (2026-07-02): baseline dependency alignment is complete in both -Cargo worlds. `tinyagents 1.3.0` is resolved with the `sqlite` feature, +Current status (2026-07-03): baseline dependency alignment is complete in both +Cargo worlds. `tinyagents 1.5.0` is resolved with the `sqlite` feature, OpenHuman pins `rusqlite = "=0.40.0"`, both worlds patch through `vendor/rusqlite-0.40.0` and `vendor/libsqlite3-sys-0.38.0`, and the SDK-gaps inventory has been refreshed against the published 1.3.0 crate source. ## Steps -1. **Bump `tinyagents` to `"1.3"`** (done in both Cargo worlds — root and +1. **Bump `tinyagents` to `"1.5.0"`** (done in both Cargo worlds — root and `app/src-tauri/`). Known 1.1→1.2 break already handled (`MessageDelta::text` ctor). Note: the `openai` crate feature was removed after 1.2.0 (1.2.1+ features are only `sqlite`/`repl`) — we never enabled it, so no impact. See "1.3.0 delta" below for new API this plan uses. 2. **Align rusqlite to 0.40** in both worlds (`Cargo.toml` root and `app/src-tauri/Cargo.toml`). OpenHuman pins `rusqlite = "=0.40.0"` and - enables `tinyagents = { version = "1.3", features = ["sqlite"] }`. + enables `tinyagents = { version = "1.5.0", features = ["sqlite"] }`. Compatibility notes: - `rusqlite 0.40` and `libsqlite3-sys 0.38` are consumed directly from crates.io. Their build scripts use the `cfg_select!` macro (stable from diff --git a/docs/tinyagents-full-migration-plan/01-tooling/README.md b/docs/tinyagents-full-migration-plan/01-tooling/README.md index 6202508e38..e120098b58 100644 --- a/docs/tinyagents-full-migration-plan/01-tooling/README.md +++ b/docs/tinyagents-full-migration-plan/01-tooling/README.md @@ -5,7 +5,7 @@ exposure, and output budgeting onto SDK primitives; delete the OpenHuman side-lookup pattern and legacy tool plumbing. Target SDK surface (available across tinyagents 1.2.x–1.3.0; current repo lock -is 1.3.0): +is 1.5.0): - `Tool::policy() -> ToolPolicy { side_effects, runtime, access }` — serializable safety metadata (read_only/writes_files/network/destructive/ diff --git a/docs/tinyagents-full-migration-plan/04-sessions/03-checkpointer.md b/docs/tinyagents-full-migration-plan/04-sessions/03-checkpointer.md index 20cd7f9bfc..cb0800418c 100644 --- a/docs/tinyagents-full-migration-plan/04-sessions/03-checkpointer.md +++ b/docs/tinyagents-full-migration-plan/04-sessions/03-checkpointer.md @@ -1,6 +1,6 @@ # 04.3 — SqliteCheckpointer swap -Baseline is complete: OpenHuman is on tinyagents 1.3.0 with the `sqlite` +Baseline is complete: OpenHuman is on tinyagents 1.5.0 with the `sqlite` feature and the compatible `rusqlite` pin. The remaining work is the OpenHuman-row migration/expiry decision. diff --git a/docs/tinyagents-full-migration-plan/10-registry.md b/docs/tinyagents-full-migration-plan/10-registry.md index 32d04c3000..e49c830716 100644 --- a/docs/tinyagents-full-migration-plan/10-registry.md +++ b/docs/tinyagents-full-migration-plan/10-registry.md @@ -37,7 +37,7 @@ aliases }`, `RegistrySnapshot` (`to_dot()`), native/MCP/Composio/generated tools, unsafe aliases → registry diagnostic errors (today: duplicate handling is scattered across generated tools, MCP, and native registration instead of one SDK diagnostic stream). - TinyAgents 1.3.0 is pinned and exposes `AliasBinding`, alias diagnostics, + TinyAgents 1.5.0 is pinned and exposes `AliasBinding`, alias diagnostics, cross-kind name-reuse detection, and `ComponentKind::{Middleware, Checkpointer, TaskStore, Listener}`; OpenHuman still needs to project those SDK diagnostics into its runtime. diff --git a/docs/tinyagents-full-migration-plan/C2b-todos-parity.md b/docs/tinyagents-full-migration-plan/C2b-todos-parity.md new file mode 100644 index 0000000000..fa5010881e --- /dev/null +++ b/docs/tinyagents-full-migration-plan/C2b-todos-parity.md @@ -0,0 +1,103 @@ +# C2b — Task board / todos onto `graph::todos` (parity note) + +Status: **first slice landed** (branch `feat/tinyagents-c2-todos`). Adapter-first, +shadow-only. Legacy stays authoritative; nothing here changes product behavior. + +This note pairs with the CONTINUATION plan §C2 (step 3) and records what the +crate `tinyagents::graph::todos` surface maps onto in OpenHuman, and what it does +**not** — the residue that must stay in the product host after a future cutover. + +## What landed in this slice + +- `src/openhuman/todos/graph_shadow.rs` — the adapter: + - Total, lossless status mapping OpenHuman ↔ crate (`map_status_to_crate` / + `map_status_from_crate`) — the two `TaskCardStatus` enums share the same + seven variants (`Todo`, `AwaitingApproval`, `Ready`, `InProgress`, + `Blocked`, `Done`, `Rejected`). + - `TaskBoardCard` field-by-field conversion (`to_crate_card`) — all metadata + (objective, plan, assignedAgent, allowedTools, approvalMode, + acceptanceCriteria, evidence, notes, blocker, sessionThreadId, + sourceMetadata, order, updatedAt) preserved. + - `spawn_mirror` — after every authoritative `todos::ops::save_cards` write of + a `Thread` board, mirrors the persisted cards into a crate `FileStore` + (`/tinyagents_graph_store`) under namespace `graph.todos`. Fire- + and-forget, log-only; a crate rejection (e.g. the single-`InProgress` + invariant) is warn-logged as a **DIVERGENCE**. + - `spawn_shadow_claim` — wired into `todos::ops::claim_card` (which is the + single claim entry-point the dispatcher's two claim sites in + `task_dispatcher/dispatch.rs` funnel through, plus RPC/reclaim callers). It + seeds the crate board with the pre-claim snapshot and replays the crate + `claim_card` CAS, warn-logging when the crate ok/err verdict disagrees with + the authoritative legacy claim. + +The legacy claim/save path is byte-for-byte preserved: `claim_card` was +refactored to compute one ok/err verdict via an extracted `apply_claim` helper +(so the shadow sees the same not-found / wrong-status / invariant outcomes), but +the persisted result and all existing tests are unchanged. + +## Crate `TodoTool` vs `agent/tools/todo.rs` — what maps + +The crate ships a single multiplexer `TodoTool` (`op`-dispatched) that is a near +drop-in for the OpenHuman `todo` tool. Shared surface: + +| Concern | Crate `TodoTool` | OpenHuman `tools/todo.rs` | Match? | +| --- | --- | --- | --- | +| Dispatch style | single tool, `op` field | single tool, `op` field | yes | +| Thread binding | `ToolExecutionContext::thread_id` (never an arg) | `thread_context::current_thread_id()` + `fork_context` parent | yes (both bind to current thread, never an arg) | +| `add`/`edit`/`update_status`/`remove`/`replace`/`clear`/`list` | present | present | yes | +| Optional card fields | objective/plan/assignedAgent/allowedTools/approvalMode/acceptanceCriteria/evidence/notes/blocker | same | yes | +| Return shape | `{ threadId, cards, markdown }` | `{ threadId, cards, markdown }` | yes | +| Status aliases | `parse_status` (pending→todo, approved→ready, …) | `ops::parse_status` (identical alias table) | yes | +| Single-`InProgress` invariant | `enforce_single_in_progress` (hard error) | `enforce_single_in_progress` (identical) | yes | +| `claim_card` CAS | `store::claim_card(expected, target)` | `ops::claim_card(expected, target)` | yes (identical semantics; proven by shadow tests) | + +## What does NOT map (product residue — must stay in the host) + +1. **Approval-gate coupling.** OpenHuman's `todo` tool stamps a default + `approvalMode` by reading `config.autonomy.require_task_plan_approval` + (`default_task_approval_mode`), and the dispatcher's + `requires_plan_approval` + `TaskPlanAwaitingApproval` `DomainEvent` drive the + interactive plan-review gate. The crate `TodoTool` has **no** config read and + **no** approval-gate wiring — it exposes `decide_plan`/`revise_plan` state + transitions only. The gate policy stays product. +2. **`DomainEvent` emissions.** `ops::claim_card`/mutations emit + `AgentProgress::TaskBoardUpdated` (via `fork_context` `on_progress`) and the + dispatcher publishes `TaskPlanAwaitingApproval`. The crate store emits + nothing. All event vocabulary stays product (ledger: keep). +3. **RPC projection shapes.** `threads.task_board_*` and `openhuman.todos_*` + (see `todos/schemas.rs`) are the wire contracts the kanban UI binds to + (`app/src/services/api/todosApi.ts`, `USER_TASKS_THREAD_ID = "user-tasks"`). + These are **unchanged** by this slice and, per §C2, become read-side + projections over the crate store only at cutover — not now. +4. **Scratch board.** OpenHuman has a thread-less in-memory `BoardLocation::Scratch` + fallback (tool calls outside a chat thread). The crate board is always + `(Store, thread_id)`, so scratch mutations have **no** crate mirror target — + the shadow skips them (trace-logged). +5. **Persistence substrate + timestamps.** Product persists RFC3339 + `updated_at` to `/agent_task_boards/.json`; the + crate uses epoch-millis strings in a `Store` namespace. The mirror does not + reconcile timestamps (cosmetic). Card-id minting also differs + (`task-` product vs `task-` crate) but ids are passed through, so a + persisted board round-trips. +6. **Run lifecycle.** `todos/runs.rs` (run records, heartbeats, stale-reclaim) + and `task_dispatcher/` executor mechanics (executor resolution, autonomous + run, board write-back) are **not** part of the crate todos surface — they + stay product and are the §C2 step-3 "runner node" work, tracked separately. + +## Single-writer constraint + +The crate `Store` has no compare-and-set, so ns `graph.todos` assumes a single +writer. The core process is that single writer (both the mirror and the +shadow-claim run in-core). Documented in the module header; honoured because all +mutations funnel through `todos::ops`. + +## Next (not in this slice) + +- Flip the mirror from shadow to authoritative (crate store becomes the source + of truth; legacy JSON becomes a projection or is retired). +- Reimplement `threads.task_board_*` / `openhuman.todos_*` as projections over + the crate store. +- Replace the dispatcher claim/poll loop with the crate `claim_card` CAS + a + graph runner node, keeping `DomainEvent` emission + channel bindings product. +- Delete `task_board.rs` + todo CRUD mechanics + dispatcher executor mechanics + (~3.2k + tests) once parity logs are clean. diff --git a/docs/tinyagents-full-migration-plan/CONTINUATION-2026-07.md b/docs/tinyagents-full-migration-plan/CONTINUATION-2026-07.md new file mode 100644 index 0000000000..cada56cfee --- /dev/null +++ b/docs/tinyagents-full-migration-plan/CONTINUATION-2026-07.md @@ -0,0 +1,265 @@ +# TinyAgents Migration — Continuation Plan (2026-07-03) + +Status: supersedes the ordering in `README.md` for remaining work. Written +after a ground-truth audit of `main` (post-#4249), a re-inventory of the +TinyAgents crate (1.4.0 published, 1.5.0 tagged), and a critical +re-evaluation of the `99-deletion-ledger.md` "Never delete" list. + +## 1. Where we actually are (ground truth, main @ 2026-07-03) + +The "finish TinyAgents harness migration" PR (#4249 → #4399) has landed. +Per-workstream state: + +| Workstream | State | +| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| 00 baseline | Done (1.3.0 + sqlite, rusqlite 0.40) — **needs re-bump to 1.4/1.5** | +| 01 tooling | Mostly done. Live: crate-internal tool side-lookup (01.1), `tool_filter.rs` 299 + `tool_prep.rs` 344 (01.3) | +| 02 models | Mostly done. Live: `reliable.rs` 900 (gated on re-architecture), `ThinkingForwarder` residual seams | +| 03 context/cache | Done. `context/` is 1.3k of product prompt/stats state | +| 04 sessions | **Primary unfinished work.** Live path is 100% legacy: `transcript.rs` 1347, `migration.rs` 373, `session_io.rs` 463, `session_db/` 4.5k, `subagent_sessions/` 653, `session_import/` 1.9k. Crate `Store`/`AppendStore` used only by the write-only importer. 04.3 checkpointer swap IS done in code (`tinyagents/checkpoint.rs` deleted) — the 04.3 doc text is stale | +| 05 events | 05.2 engine deletion done. `progress_tracing` (817 + 477 langfuse + 719 tests) still the live web-progress exporter; journal projection not at parity | +| 06 cost | Not wired: no `BudgetMiddleware`/`BudgetLimits`/`RunTree` in the shared runner. Blocker: `UsageRecorded` de-duplication | +| 07 subagents | Diagnostic-skeleton graph only; procedural runner still live. `running_subagents.rs` has **grown to 1931 lines** (target ≤300); `JsonlTaskStore` adopted for detached lifecycle records; `run_queue/` + `spawn_depth_context.rs` still live | +| 08 orchestration | 08.1/08.2/08.4 done. Live: 08.3 durable interrupts for approvals, 08.5 `worktree_context.rs` fallback thread | +| 09 embeddings | Done | +| 10 registry | Pending — `CapabilityRegistry` unused in src/ | +| 11 testing | Conformance pass green on 2026-07-02 baseline | + +Crate APIs available but **unused** in src/: `JsonlTaskStore` (now partially +adopted), `BudgetMiddleware`, `ContextualToolSelectionMiddleware` (only its +shadow), `UnknownToolPolicy`, `CapabilityRegistry`. + +## 2. Crate delta: 1.4.0 / 1.5.0 (the upgrade this plan targets) + +- **1.4.0 (published 2026-07-02)** + - `graph::goals` — durable per-thread `ThreadGoal`: completion contract, + token budget, Active/Paused/BudgetLimited/Complete, `goal_gate_node` + self-driving loop, `run_continuation_tick`, `note_user_turn`, model tools + `goal_get/goal_set/goal_complete`, host `goal_pause/resume/clear`. + Persists on harness `Store` ns `graph.goals`. + - `graph::todos` — `TaskBoard` kanban (Todo→Ready→InProgress→Done, Blocked, + AwaitingApproval), single-`InProgress` invariant, `claim_card` CAS, + single multiplexer `TodoTool`. Persists ns `graph.todos`. + - Graph resilience: `CompiledGraph::with_node_retry(RetryPolicy)`, + opt-in backoff sleeping, failure-boundary checkpoints + + `CompiledGraph::retry(thread)` resumable failures, + `GraphEvent::NodeRetryScheduled`. +- **1.5.0 (tagged 2026-07-03, not yet on crates.io)** + - `harness::no_progress::NoProgressTracker` — extracted from OpenHuman + PR #4389. Pure state machine: `record(step, &ToolAttempt) -> +Continue/Nudge/Halt`; identical-failure ladder, varied-failure backstop, + fast-trip on hard policy rejects. +- Known crate limitation relevant here: `Store` has no compare-and-set, so + goals/todos mutations must funnel through one process (fine — the core is + the single writer). + +## 3. Re-evaluated component verdicts (revises `99-deletion-ledger.md`) + +The old "Never delete" list over-protects. Revised verdicts, ordered by +reclaimable lines (non-test; tests roughly double each figure): + +### Migrate to crate, then delete locally (upstream-extraction candidates) + +Precedent: `NoProgressTracker` was extracted upstream from #4389 and shipped +in 1.5.0. Same play for: + +| Component | Lines (~generic) | Notes | +| ------------------------------------------------------------- | ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `pformat.rs` + `dispatcher.rs` + `harness/parse.rs` | 1941 (~1900) | Tool-call dialect machinery (P-format encoder, permissive XML/JSON parser with key-drift recovery). Zero DomainEvent coupling. Delete gate: 04.2 read-cutover (no live path parses provider text) | +| `multimodal.rs` | 1690 (~1550) | `[IMAGE:]`/`[FILE:]` marker → provider content blocks, mime allowlist, PDF extraction, fetch gating, truncation budget. Only the marker convention is product | +| `progress_tracing.rs` + `langfuse.rs` | 1294 (~1200) | Crate already ships Langfuse exporters + journals. Delete gate: 05.3 journal-backed web-progress parity | +| `tool_result_artifacts/` | 588 (~500) | Already built on crate `Store`; overflow-to-artifact is a generic harness concern. Product residue: PII scrub hook | +| `hooks.rs` + `stop_hooks.rs` trait machinery | 543 (~450) | PostTurnHook/StopHook traits are pure harness; product hook bodies stay | +| `host_runtime.rs` adapter core | 456 (~350) | Native/Docker `RuntimeAdapter` overlaps crate workspace isolation | +| `ArgRecoveryMiddleware`, `RepeatedToolFailureMiddleware` core | ~300 | Latter becomes a thin driver over crate `NoProgressTracker` (1.5.0) | +| `tool_filter.rs` fuzzy ranker | 299 (~250) | Generic ranking; Composio input types stay product | +| `triage/` evaluator+routing+decision core | ~800 of 3779 | Generic "LLM triage node" (tiered fallback, cache, verdict parse). `envelope.rs`/`escalation.rs`/`events.rs` stay product (Composio, DomainEvents, agent ids) | + +### Replace with crate features, then delete (no upstreaming needed) + +| Component | Lines | Replaced by | +| ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `thread_goals/` | 2185 | `graph::goals` (1.4.0). OpenHuman keeps RPC schemas + UI projection over the crate store | +| `agent/task_board.rs` + `tools/todo.rs` mechanics | 604 + ~430 | `graph::todos` `TaskBoard` + `TodoTool` (1.4.0). RPC shapes (`threads.task_board_*`, `openhuman.todos_*`) become projections | +| `task_dispatcher/` executor mechanics | ~450 of 1494 | `claim_card` CAS + graph runner node; DomainEvent emission + channel bindings stay | +| 6 duplicate middlewares in `tinyagents/middleware.rs` (2448 total): `OpenHumanToolExposureShadow`, `CacheAlign`, `Microcompact`, `CostBudget`, `PromptCacheSegment` (partial), `ToolPolicyMiddleware` (local) | ~900 | Crate `ContextualToolSelectionMiddleware`, cache guard, compaction, `BudgetMiddleware`, `ToolAllowlistMiddleware`. All are self-acknowledged shadows/parity holds | +| `spawn_depth_context.rs` | 92 | Crate `RecursionPolicy`/`RecursionStack` | +| legacy session stack (04.2 phase 4) | ~9000 incl. tests | Crate `Store`/`AppendStore`/`StoreChatHistory` | + +### Confirmed product — keep (ledger was right) + +`archivist/` (memory/learning policy — only the hook-scheduling shell ~200 +lines is generic), `bus.rs`, `schemas.rs`, `error.rs`, `turn_origin.rs`, +`memory_context.rs`, `debug/`, `library/`, `progress.rs` event vocabulary, +prompts **content** (the section/render machinery ~600 lines is generic but +decoupling is low-value), preference/`run_workflow`/`delegate_to_personality` +tools, approval/`MemoryProtocol`/`CliRpcOnly`/`ToolOutcomeCapture` +middlewares. + +Cross-cutting gate for the coupled middle tier (session turn/builder, triage +escalation, task dispatch): **DomainEvent subscribers and JSON-RPC shapes** — +these must become projections before their hosts can shrink. + +### Coverage sweep: harness files previously unreferenced by any plan doc (2026-07-03) + +A basename grep of `agent/harness/**` against this plan folder found these +non-test files with no verdict anywhere. Assigned now (tests follow parents): + +| File | Lines | Verdict | +| --- | --- | --- | +| `subagent_runner/extract_tool.rs` | 612 | **[migrate/delete]** — generic progressive-disclosure Q&A over handoff-cached payloads; pairs with `handoff.rs`. Goes with the C5 overflow-to-artifact/handoff extraction | +| `subagent_runner/handoff.rs` | 287 | **[migrate/delete]** — generic oversized-result handoff cache (crate tool-output/artifact overlap). C5 | +| `subagent_runner/types.rs` | 232 | **[shrink]** — spawn options/outcome/error taxonomy largely maps to crate `SubAgentPolicy`/`OrchestrationTaskKind`. C6 | +| `subagent_runner/autonomous.rs` | 29 | **[keep]** — approval-gating override policy for unattended skill runs | +| `fork_context.rs` | 226 | **[delete-via-crate]** — task-local parent-context carrier working around the `Tool` trait; replace with `ToolExecutionContext`/`RunContext` fields (same play that deleted `worktree_context.rs`). C6 | +| `sandbox_context.rs` | 94 | **[delete-via-crate]** — same task-local pattern (sandbox_mode carrier). C6 | +| `task_recency_context.rs` | 106 | **[delete-via-crate]** — same pattern (Composio recency window). C6 | +| `turn_attachments_context.rs` | 71 | **[delete-via-crate]** — same pattern (vision-attachment forwarding). C6 | +| `session/turn_checkpoint.rs` | 105 | **[delete]** with the C1 session cutover (turn checkpointing rides crate checkpoints) | +| `memory_protocol.rs` | 386 | **[keep]** — product memory-protocol state machine (#4116) behind `MemoryProtocolMiddleware` | +| `builtin_definitions.rs` | 273 | **[keep]** — product agent-definition data facade | +| `definition_loader.rs` | 295 | **[keep]** — product TOML definition loader | +| `archivist/{hook_impl,recap,tree_ingest}.rs` | 592 | **[keep]** — archivist internals, verdict inherited from §3 | + +The four task-local context carriers (`fork_context`, `sandbox_context`, +`task_recency_context`, `turn_attachments_context`, ~500 lines) are one +C6 work item: extend the crate execution context instead of task-locals. + +## 4. Continuation workstreams (execution order) + +Sized for `/goal` execution like the original folders; each step lands code + +tests + deletions + a ledger tick. + +### C0 — Crate bump to 1.4 (then 1.5 when published) + +1. Bump both Cargo worlds to `tinyagents = "1.4"`; re-verify sqlite chain. +2. When 1.5.0 publishes: bump again; rewrite `RepeatedToolFailureMiddleware` + as a driver over `harness::no_progress::NoProgressTracker`; delete the + in-house identical-failure ladder. +3. Adopt `with_node_retry` + `CompiledGraph::retry` on the delegation and + spawn-parallel graphs (replaces bespoke retry glue; complements 08.3). +4. Update `00-baseline.md` "1.3.0 delta" → 1.4/1.5 delta; refresh + `docs/tinyagents-sdk-gaps.md` (goals/todos and no-progress close two + OpenHuman-convergence items). + +### C1 — Sessions cutover (04.2 phases 2–4) — biggest single unlock + +The importer (`session_import/`) already proves shape parity. Execute: + +1. 04.1 live dual-writes (turns append `session.{stem}.messages` + descriptor + upsert alongside legacy JSONL). +2. Shadow reads + parity fixtures (11-fixture matrix), then flip reads. +3. Retire: `transcript.rs` (1347+978), `migration.rs` (373+170), + `session_io.rs` (463), `session_db/` generic parts (~1.6k), + `subagent_sessions/` (653); `session_import/` one release later. +4. Then (unblocked by "no live path parses provider text"): delete + `dispatcher.rs` + `parse.rs` + `pformat.rs` (~1.9k + tests), after + upstreaming the dialect machinery (C5) or accepting native-only. + ~11k lines total. +5. Fix the stale 04.3 doc (checkpointer swap already landed; `checkpoint.rs` + is gone). + +### C2 — Thread goals + thread tasks onto `graph::goals`/`graph::todos` + +1. Adapter: `thread_goals/store.rs`+`runtime.rs`+`continuation.rs` → + crate `ThreadGoal` + `goal_gate_node` + `run_continuation_tick`; keep + `thread_goals/schemas.rs` RPC shapes as projections; map + `goal_get/set/complete` model tools + host pause/resume/clear. +2. One-time migration of existing goal rows into ns `graph.goals`. +3. `task_board.rs` → crate `TaskBoard`; `tools/todo.rs` → crate `TodoTool`; + `task_dispatcher/` claim/poll loop → `claim_card` CAS + runner node. + DomainEvent emissions + `threads.task_board_*`/`openhuman.todos_*` RPC + become read-side projections of the crate store. +4. Single-writer constraint honoured (core is the only mutator; document it). +5. Delete: `thread_goals/{store,runtime,continuation}.rs` mechanics, + `task_board.rs`, todo CRUD mechanics, dispatcher executor mechanics + (~3.2k + tests). + +### C3 — Middleware de-duplication (01.1/01.3/06 finish) + +1. Flip `ContextualToolSelectionMiddleware` from shadow to owner (parity logs + are already accumulating); delete `OpenHumanToolExposureShadowMiddleware`, + then `tool_filter.rs` mechanics + `tool_prep.rs` selection half. +2. De-duplicate `UsageRecorded` (single owner: crate event → bridge records + once), then install crate `BudgetMiddleware`/`BudgetLimits`; delete local + `CostBudgetMiddleware` + `turn_subagent_usage.rs` task-local (206) in + favour of `RunTree` rollup. +3. ~~Delete `CacheAlign` + `Microcompact`~~ CORRECTED by C3 execution + (2026-07-03): `CacheAlign` deleted (warn-only, crate cache guard already + installed — commit on `feat/tinyagents-c3-middleware-dedupe`). + `Microcompact` is NOT crate-superseded — tinyagents 1.5.0 has no + tool-result body-clearing equivalent and the local one is live on the + session turn path — moved to the C5 upstream-extraction batch (extract a + crate microcompact first, then delete locally). +4. ~~Adopt `UnknownToolPolicy::Rewrite`~~ CORRECTED by C3 execution: the + sentinel + `UnknownToolRewriteMiddleware` were already deleted in 01.2; + live policy is `UnknownToolPolicy::ReturnToolError`, which preserves the + attempted tool name + args (#4419 UX). Rewrite mode would regress (needs a + catch-all target tool); rationale comment added at `run_policy_for`. Done — + no further action. + Net: `tinyagents/middleware.rs` 2448 → ~1500 (Microcompact stays until C5). + +### C4 — Events/progress projection (05.3) then delete progress_tracing + +1. Journal-backed web-progress projection (`HarnessEventJournal` + + `HarnessStatusStore` + late-attach `replay_from`). +2. Parity vs `SpanCollector` spans; then delete `progress_tracing.rs` + + `progress_tracing/` (~2k incl. tests) — Langfuse rides crate exporters. +3. DomainEvent/AgentProgress become projections (ledger final clause). + +### C5 — Upstream extraction batch (tinyagents PRs, then local deletes) + +In crate-repo PRs, mirroring the NoProgressTracker precedent, in value order: + +1. `multimodal` attachment resolver (~1550) — marker convention stays here. +2. Tool-call dialect layer (`pformat`/parser) (~1900) — enables C1 step 4 to + be a pure delete. +3. Overflow-to-artifact tool-result store (~500). +4. PostTurnHook/StopHook trait machinery (~450); host runtime adapter (~350); + fuzzy tool ranker (~250); ArgRecovery (~150); triage evaluator core + (~800) as a generic "LLM triage node". + Each: crate PR → bump → local adapter shrinks to product residue → delete. + +### C6 — Subagents finish (07) + +1. Absorb `ops/runner.rs` (1212) + `ops/graph.rs` (1039) into named pipeline + nodes (07.1); procedural runner retired. +2. `running_subagents.rs` 1931 → ≤300: status/tombstone persistence fully on + `JsonlTaskStore` + `orchestrate_*` tools (07.2). +3. Steering: crate `SteeringRegistry` owns lanes; split then delete + `run_queue/` (317); `spawn_depth_context.rs` → `RecursionPolicy` (07.3). +4. Replace the four task-local context carriers (`fork_context`, + `sandbox_context`, `task_recency_context`, `turn_attachments_context`, + ~500 lines) with typed fields on the crate execution context (see §3 + coverage sweep); shrink `subagent_runner/types.rs` onto crate + `SubAgentPolicy`/task types. + +### C7 — Remaining gated items + +- 08.3 durable approval interrupts (now easier with 1.4.0 resumable + failures); 08.5 drop the `worktree_action_dir` fallback thread, delete + `worktree_context.rs` remnant wiring. +- 10 CapabilityRegistry projection + fail-closed diagnostics. +- 02.2 `reliable.rs`: unchanged verdict — gated on routing non-turn provider + calls through the crate harness; do not force. +- `ThinkingForwarder`: delete when crate `ModelDelta` grows reasoning + + tool-name-on-start (file upstream issue; sdk-gaps §3). + +## 5. Expected reclaim + +| Phase | Local lines deleted (incl. tests, rough) | +| --------------------- | ---------------------------------------------------------------------------- | +| C1 sessions + dialect | ~11,000 | +| C2 goals/todos | ~4,000 | +| C3 middleware dedupe | ~1,500 | +| C4 progress tracing | ~2,000 | +| C5 upstream batch | ~6,000 | +| C6 subagents | ~4,500 | +| Total | **~29,000** (of ~57k in `agent/` + ~11k adapter + ~9k session/goals modules) | + +## 6. Rules (unchanged) + +Approval/security/sandbox/credential boundaries inviolate; JSON-RPC contracts +stable unless a migration note lands; adapter → proven parity → delete; +explicit `git add `; verify branch before commit. Tests deferred per +workstream slice with a final conformance pass (11). diff --git a/docs/tinyagents-full-migration-plan/HANDOFF-2026-07-03.md b/docs/tinyagents-full-migration-plan/HANDOFF-2026-07-03.md new file mode 100644 index 0000000000..934aab4a89 --- /dev/null +++ b/docs/tinyagents-full-migration-plan/HANDOFF-2026-07-03.md @@ -0,0 +1,151 @@ +# Handoff — TinyAgents migration wave 1 (2026-07-03) + +Context file for picking this work up in a fresh session. Read together with +[`CONTINUATION-2026-07.md`](CONTINUATION-2026-07.md) (the plan this wave +executes) and [`99-deletion-ledger.md`](99-deletion-ledger.md). + +## What this session did + +1. **Audit** — ground-truth audit of post-#4249 main, TinyAgents crate + re-inventory (1.4.0 published: `graph::goals`/`graph::todos`/graph + resilience; 1.5.0: `NoProgressTracker` extracted from our #4389), and a + critical re-review of the deletion ledger's "never delete" list. Product of + that: `CONTINUATION-2026-07.md` (workstreams C0–C7, ~29k-line reclaim) with + a coverage-sweep appendix for previously unreferenced harness files. +2. **Vendored the SDK** — `vendor/tinyagents` git submodule pinned at tag + `v1.5.0`; `[patch.crates-io] tinyagents = { path = ... }` in BOTH Cargo + worlds (root `Cargo.toml`, `app/src-tauri/Cargo.toml`); Dockerfile now + `COPY vendor/ vendor/`; release-production/staging docker jobs run a + targeted `git submodule update --init vendor/tinyagents`. All other + cargo-running CI jobs already checkout `submodules: recursive`; the mobile + crates (`app/src-tauri-mobile`) do NOT depend on the core crate, so their + `submodules: false` stays. Agents can now edit SDK source in-tree and PR + upstream from the submodule (do this in C5). +3. **Executed wave 1** via a 6-agent workflow (run id `wf_df08984c-139`, + scripts under the session dir; all agents completed). +4. `agent-diff-v0.58.7-to-HEAD.diff` at repo root (gitignored) — full diff of + `src/openhuman/agent/` since the last release, for review. + +## Branch map (local; base → children) + +- **`feat/tinyagents-c0-15-baseline`** ← base for everything below; branched + from `docs/tinyagents-migration-continuation` (which holds the plan docs and + branched from upstream/main). + - `c7f287380` bump tinyagents 1.5.0 (+ .diff gitignore) + - `e2f010cba` vendor submodule + [patch.crates-io] both worlds + - `4c2895801` RepeatedToolFailureMiddleware → crate `NoProgressTracker` + driver (in-house identical/varied/hard-reject ladder deleted; Nudge→ + `SteeringCommand::Redirect`, Halt→`HaltSummarySlot`+Pause+reset; 25 + middleware tests green) + - `fbafad2d3` CI docker submodule fixes + - `dd63b3a66` `with_node_retry(RetryPolicy::default().with_max_attempts(1))` + on delegation + spawn-parallel graphs (behavior-preserving seam; raise + attempts later; backoff sleeping off) + - `02be67b06` plan corrections from C3 (below) +- **`feat/tinyagents-c1-dual-writes`** — 04.1 done. Live turns dual-write to + `{workspace}/tinyagents_store/{kv,journal}` (`session.{stem}.messages` + + descriptor upsert, via `session_import/convert.rs` normalization). New + config flag `agent.session_dual_write` (serde default **ON**); + `OPENHUMAN_SESSION_DUAL_WRITE` is a pure **kill switch** (can force off, + never on), read per-turn. Parity test: + `session_import::live_tests` (2 pass). Read path untouched. +- **`feat/tinyagents-c2-goals`** — `thread_goals/crate_adapter.rs` dual-write + mirror into ns `graph.goals` (keys byte-identical to the crate reader, + proven by reading back via `tinyagents::graph::goals::store::get`); + idempotent `migrate_legacy_goals_into_crate_store` (callable, NOT wired to + boot); shadow tool surface flag-gated **OFF**. Legacy store authoritative. +- **`feat/tinyagents-c2-todos`** — `todos/graph_shadow.rs` mirrors boards into + ns `graph.todos` at `/tinyagents_graph_store`; `claim_card` CAS + shadow through the single refactored `todos::ops::claim_card` chokepoint + (`apply_claim` helper), divergence warn-logged; parity note + `C2b-todos-parity.md` (what maps vs product residue). Legacy authoritative. +- **`feat/tinyagents-c3-middleware-dedupe`** — PARTIAL by design: + `CacheAlignMiddleware` deleted (−147 lines; crate cache guard already + installed). See "plan corrections" for the two refusals. +- **`feat/tinyagents-c4-journals`** — restart-stable event ids + (`EventSink::with_stream_id(run_id)` → `{run_id}-evt-{offset}`), run id + minted once via `journal::mint_run_id`; `FileStatusStore` records thread_id + (`HarnessRunStatus::with_thread`) so `list_by_thread` answers; late-attach + replay test reconstructs from the store alone. + +All slice branches were created FROM `feat/tinyagents-c0-15-baseline` +(worktree HEADs were stale; agents rebased themselves correctly). + +## Plan corrections discovered during execution (already committed, 02be67b06) + +- **MicrocompactMiddleware is NOT crate-superseded** — tinyagents 1.5.0 has no + tool-result body-clearing; local one is live at `turn/core.rs` (~:885, + keep_recent=5). Moved to C5: extract into the crate first, then delete. +- **Unknown-tool is already done** — sentinel + rewrite middleware deleted in + 01.2; live policy `UnknownToolPolicy::ReturnToolError` deliberately kept + over `Rewrite` (preserves #4419 attempted-name UX; Rewrite needs a catch-all + target tool). Rationale comment at `run_policy_for`. +- C3's shadow-parity investigation: exposure-shadow parity logs exist but are + not yet asserted divergence-free — flipping + `ContextualToolSelectionMiddleware` to owner still needs a parity-log audit. + +## Known debt / gotchas for the next session + +- **Worktrees + submodule**: fresh worktrees leave `vendor/tinyagents` empty → + cargo fails on the patch path. Always + `git submodule update --init vendor/tinyagents` first. +- Test invocation: `RUST_MIN_STACK=16777216 … --test-threads=1` + (deep sub-agent futures overflow default stacks); `GGML_NATIVE=OFF` for + cargo on Apple Silicon. Full suites were deliberately NOT run — targeted + module tests only (user directive). Integration debt is listed per-slice in + the workflow reports and belongs to the final conformance pass (plan §11). +- C1's `RunContext.stores` session-KV registration has no in-run consumer + until 04.2 (`StoreChatHistory` adoption was deferred as read-path work). +- C4 follow-ups: replay RPC (`agent.run_events`) unexposed; + parent/root run-id lineage still 05.2/05.3. +- The five slice branches will conflict lightly in `tinyagents/middleware.rs` + / `mod.rs` when merged together — merge C0 first, then slices one at a + time, resolving against C0's tree. + +## Wave 2 status (2026-07-03, late) + +All five wave-1 slice branches were MERGED into +`feat/tinyagents-c0-15-baseline` (light conflicts only; fmt commit on top; +50 targeted tests green post-merge) and pushed to PR **#4473**, which now +carries the entire wave-1 scope. + +Wave 2 was launched as a 4-agent workflow but **died on the monthly spend +limit** before any work landed. Its scoped slices (prompts preserved in the +session workflow script `tinyagents-continuation-wave2-*.js`, resumable via +`resumeFromRunId: wf_d6f5d26d-7e4`): + +1. `W2-shadow-reads` — 04.2 phase 2: store-backed shadow reader, compare with + legacy render, log divergence, legacy stays authoritative + (flag `agent.session_shadow_reads`, env kill switch). +2. `W2-budget-dedupe` — single-owner `UsageRecorded` recording (dedupe guard) + → install crate `BudgetMiddleware` observe-only; local `CostBudgetMiddleware` + demoted to divergence-logging shadow; flip criteria documented. +3. `W2-microcompact-upstream` — implement microcompact IN `vendor/tinyagents` + (branch `feat/microcompact-middleware`), push submodule upstream, then swap + OpenHuman to the crate version + delete local (gitlink bump ONLY if the + submodule push succeeded). +4. `W2-replay-rpc` — `openhuman.agent_run_events` (paged, `next_offset`), + `agent_run_status`, `agent_runs_active` controllers over the C4 journal/ + status seams, registry pattern. + +## Merge / PR state + +- Wave-1 execution branches are LOCAL (not pushed) except as noted below. +- C0 PR: see PR link in the section below / `gh pr list --repo + tinyhumansai/openhuman --author @me`. +- Git etiquette (user rules): push to `origin` (senamakel fork), PR against + `upstream` (tinyhumansai) with `--head senamakel:`; explicit + `git add ` only; never commit on main. + +## Suggested next steps (wave 2) + +1. Merge C0 PR; rebase + push + PR the five slice branches (stack on C0). +2. 04.2 shadow reads → read cutover (biggest deletion unlock, ~9k lines gated + on it, incl. dispatcher/parse/pformat). +3. C5 upstream extractions INTO `vendor/tinyagents` (now editable in-tree): + microcompact (new — see corrections), multimodal resolver, dialect layer, + overflow-to-artifact, hooks traits. +4. C3 remainder: UsageRecorded de-dup → crate `BudgetMiddleware` → delete + local `CostBudgetMiddleware`; exposure-shadow parity audit → flip owner. +5. Flip C2 shadows to authoritative once divergence logs are clean; wire the + goals migration helper to boot. diff --git a/docs/tinyagents-full-migration-plan/README.md b/docs/tinyagents-full-migration-plan/README.md index bb0ce36154..dca9d3d835 100644 --- a/docs/tinyagents-full-migration-plan/README.md +++ b/docs/tinyagents-full-migration-plan/README.md @@ -2,6 +2,12 @@ Status: active plan (2026-07-02). Branch: `issue/4249-finish-tinyagents-migration`. +> **2026-07-03 update:** #4249 has landed on main. Remaining work is +> re-planned in [`CONTINUATION-2026-07.md`](CONTINUATION-2026-07.md), which +> supersedes the workstream ordering below and targets tinyagents 1.4/1.5 +> (`graph::goals`, `graph::todos`, `NoProgressTracker`, resumable graph +> failures). + Goal: **hard-migrate** OpenHuman's agent harness onto the `tinyagents` crate as the library for orchestration, caching, tooling, observability, model providers, context management, embeddings, sub-agents, steering, summarization, @@ -15,9 +21,10 @@ goals. Execute a step file end-to-end (code + tests + deletions + commit). ## Key facts superseding older docs -- Current crate is **1.3.0** (published 2026-07-02); the plan targets it — - see the "1.3.0 delta" in `00-baseline.md`. -- `docs/tinyagents-sdk-gaps.md` was refreshed against TinyAgents 1.3.0 and now +- Current crate is **1.5.0**; the plan still records the earlier "1.3.0 delta" + in `00-baseline.md` where those primitives first became available. +- `docs/tinyagents-sdk-gaps.md` was refreshed against TinyAgents 1.3.0 and + should be re-audited after the 1.5.0 bump; it currently tracks only residual gaps. tinyagents 1.2.0-1.3.0 ships `UnknownToolPolicy`, `ToolPolicy` safety metadata + `ToolPolicyMiddleware`, reasoning deltas (`MessageDelta.reasoning`), durable `JsonlTaskStore` + orchestration tools, diff --git a/docs/tinyagents-migration-spec.md b/docs/tinyagents-migration-spec.md index cb09ed5912..74230768c7 100644 --- a/docs/tinyagents-migration-spec.md +++ b/docs/tinyagents-migration-spec.md @@ -6,7 +6,7 @@ TinyAgents source reviewed: `tinyhumansai/tinyagents` `origin/main` at `8f226f1`, crate version `1.1.0`. Refreshed against `tinyhumansai/tinyagents` `main` at `348a0e7dc71a1f9039f3d523a2a384661a7a9acd` after the SDK/docs update. Current OpenHuman dependency in this checkout is -`tinyagents = { version = "1.3", features = ["sqlite"] }`. +`tinyagents = { version = "1.5.0", features = ["sqlite"] }`. OpenHuman already depends on TinyAgents and already routes the live agent turn through `src/openhuman/tinyagents/`. This spec is not a proposal to add @@ -80,7 +80,7 @@ OpenHuman Rust core: Already done or partially done: -- `Cargo.toml` pins `tinyagents = { version = "1.3", features = ["sqlite"] }`. +- `Cargo.toml` pins `tinyagents = { version = "1.5.0", features = ["sqlite"] }`. - `src/openhuman/tinyagents/mod.rs` registers OpenHuman `Provider` and `Tool` adapters on `tinyagents::harness::runtime::AgentHarness`. - `ProviderModel` maps OpenHuman `ChatRequest`/`ChatResponse` into diff --git a/docs/tinyagents-session-migration-design.md b/docs/tinyagents-session-migration-design.md index 0905c862f0..020cf7cd15 100644 --- a/docs/tinyagents-session-migration-design.md +++ b/docs/tinyagents-session-migration-design.md @@ -16,7 +16,7 @@ OpenHuman session key. ## Source inventory (what exists on disk today) -All facts verified against the current checkout (TinyAgents 1.3 pinned with the +All facts verified against the current checkout (TinyAgents 1.5.0 pinned with the `sqlite` feature enabled). ### 1. Transcript JSONL (source of truth) @@ -73,7 +73,7 @@ All facts verified against the current checkout (TinyAgents 1.3 pinned with the toolkit/model/sandbox/action-root selector fields, `status`, `reusable`, inline `latestHistory` message mirror, timestamps. -## Target shape (TinyAgents 1.3 primitives) +## Target shape (TinyAgents 1.3+ primitives) Use the crate's `harness::store` as the substrate — no new storage layer: diff --git a/gitbooks/developing/architecture/agent-harness.md b/gitbooks/developing/architecture/agent-harness.md index 67b11591d1..224be4e932 100644 --- a/gitbooks/developing/architecture/agent-harness.md +++ b/gitbooks/developing/architecture/agent-harness.md @@ -49,7 +49,7 @@ icon: layer-group ## TinyAgents crate: features & compatibility -OpenHuman pins `tinyagents = { version = "1.3", features = ["sqlite"] }` (see [`Cargo.toml`](../../../Cargo.toml)). The rationale, so future upgrades don't silently regress it: +OpenHuman pins `tinyagents = { version = "1.5.0", features = ["sqlite"] }` (see [`Cargo.toml`](../../../Cargo.toml)). The rationale, so future upgrades don't silently regress it: - **OpenHuman-owned providers only.** We do **not** enable any bundled provider feature. OpenHuman owns provider transport, credentials, OAuth, and billing classification, so the live model is always OpenHuman's `Provider` wrapped as [`ProviderModel`](../../../src/openhuman/tinyagents/model.rs) — never an SDK-owned provider client. The `ChatModel` adapter is the seam that replaces feature-gated SDK providers. - **`sqlite` feature enabled with one native sqlite chain.** OpenHuman's root and Tauri Cargo worlds pin `rusqlite = "=0.40.0"` and patch `rusqlite` / `libsqlite3-sys` locally to avoid the upstream `cfg_select!` build break on the current toolchain. Both worlds resolve to a single `libsqlite3-sys v0.38.0` chain. Durable graph checkpoints still run through [`SqlRunLedgerCheckpointer`](../../../src/openhuman/tinyagents/checkpoint.rs) until the migration re-points those rows to the crate checkpointer. @@ -443,7 +443,7 @@ Every agent turn — chat (`harness/session/turn/core.rs`), channel/CLI (`harnes | `mod.rs` / `model.rs` / `tools.rs` / `convert.rs` | `RunPolicy` / `ChatModel` / `Tool` / message adapters (incl. unknown-tool policy and out-of-band reasoning forwarding). | | `observability.rs` | Harness `AgentEvent` → `AgentProgress` + cost; `GraphTracingSink` for graph events. | | `orchestration.rs` | Re-exported `graph::orchestration` task-store types; map-reduce fanout now uses the TinyAgents SDK surface directly. | -| `checkpoint.rs` | `SqlRunLedgerCheckpointer` — a `Checkpointer` over openhuman's SQLite (`graph_checkpoints` table). TinyAgents 1.3 now ships `SqliteCheckpointer`; OpenHuman keeps this adapter until existing checkpoint rows are migrated or expired and schema ownership is settled. | +| `checkpoint.rs` | `SqlRunLedgerCheckpointer` — a `Checkpointer` over openhuman's SQLite (`graph_checkpoints` table). TinyAgents 1.3+ ships `SqliteCheckpointer`; OpenHuman keeps this adapter until existing checkpoint rows are migrated or expired and schema ownership is settled. | | `delegation.rs` | The durable `plan → execute ⇄ review → finalize` delegation graph (production worker wired in `agent_orchestration::delegation`). | **Orchestration on graphs** (`src/openhuman/agent_orchestration/`): @@ -458,7 +458,7 @@ Every agent turn — chat (`harness/session/turn/core.rs`), channel/CLI (`harnes - **Sub-agent build pipeline** (`subagent_runner/`) — definition resolution, archetype tool filtering, provider resolution, narrow prompt building, memory context, worker-thread mirror, handoff cache, checkpoint/resume — stays openhuman-owned. Sub-agents already *execute* on the harness; the crate's generic `SubAgentTool` would discard this pipeline for marginal crate-native depth tracking (openhuman's `spawn_depth_context` already bounds recursion). - **Durable run ledgers** (`workflow_runs`, `agent_teams`, `command_center`, `subagent_sessions`) stay on openhuman SQLite/JSON until their controller projections and restart semantics are mapped onto TinyAgents task/status/journal records. The `agent_teams` race-safe SQL compare-and-swap task claim remains OpenHuman-owned. -> **Note:** TinyAgents 1.3 ships harness store/cache/session primitives (`harness::store` with JSONL append stores, `harness::cache`, `harness::subagent`, lineage-aware status) plus graph task stores and conformance contracts. The plan for migrating the session shell, sub-agent pipeline, and detached-task lifecycle onto those primitives lives in [`docs/tinyagents-harness-migration-audit.md`](../../../docs/tinyagents-harness-migration-audit.md). +> **Note:** TinyAgents 1.3+ ships harness store/cache/session primitives (`harness::store` with JSONL append stores, `harness::cache`, `harness::subagent`, lineage-aware status) plus graph task stores and conformance contracts. The plan for migrating the session shell, sub-agent pipeline, and detached-task lifecycle onto those primitives lives in [`docs/tinyagents-harness-migration-audit.md`](../../../docs/tinyagents-harness-migration-audit.md). ## See also diff --git a/src/openhuman/agent/harness/session/turn/core.rs b/src/openhuman/agent/harness/session/turn/core.rs index 2652981793..a8dedb3aea 100644 --- a/src/openhuman/agent/harness/session/turn/core.rs +++ b/src/openhuman/agent/harness/session/turn/core.rs @@ -873,15 +873,17 @@ impl Agent { // arguments (no child scope, no early-exit tools, graceful cap pause, // per-turn output cap) and runs the context-window summarization step. // Context middlewares sourced from this session's ContextManager: the - // per-tool-result byte cap + payload summarizer (after_tool), the - // cache-align warning and microcompact tool-body clearing (before_model). + // per-tool-result byte cap + payload summarizer (after_tool) and + // microcompact tool-body clearing (before_model). KV-cache-prefix drift + // detection is owned by the crate `PromptCacheGuardMiddleware` (fed by + // `PromptCacheSegmentMiddleware`); the warn-only `CacheAlignMiddleware` + // was deleted in C3. let context_mw = crate::openhuman::tinyagents::TurnContextMiddleware { tool_result_budget_bytes: self.context.tool_result_budget_bytes(), payload_summarizer: self.payload_summarizer.clone(), artifact_store, tokenjuice_compaction_enabled: self.context.compaction_enabled(), tokenjuice_compression: self.tokenjuice_compression, - cache_align: self.context.compaction_enabled(), microcompact_keep_recent: self.context.microcompact_keep_recent(), // Honor the [context].enabled / autocompact_enabled opt-outs: when off, // the summarization middleware is not installed (no summarizer tokens, diff --git a/src/openhuman/agent/harness/session/turn/session_io.rs b/src/openhuman/agent/harness/session/turn/session_io.rs index 4a6f1e9f68..1bd40ef56b 100644 --- a/src/openhuman/agent/harness/session/turn/session_io.rs +++ b/src/openhuman/agent/harness/session/turn/session_io.rs @@ -240,8 +240,9 @@ impl Agent { match transcript::write_transcript(path, messages, &meta, turn_usage) { Ok(()) => { - // Best-effort, non-fatal dual-write into the TinyAgents store, - // behind `OPENHUMAN_SESSION_DUAL_WRITE` (default OFF). Only runs + // Best-effort, non-fatal dual-write into the TinyAgents store. + // Gated by the default-ON session dual-write flag + // (`OPENHUMAN_SESSION_DUAL_WRITE` is a kill switch). Only runs // after the legacy JSONL append above succeeds; the legacy path // is primary and untouched (issue #4249, 04.1). self.maybe_dual_write_session_store(path, messages, &meta, turn_usage); @@ -257,9 +258,10 @@ impl Agent { /// Mirror the just-persisted turn into the TinyAgents session store. /// - /// Additive and gated on the `OPENHUMAN_SESSION_DUAL_WRITE` flag (default - /// OFF): when the flag is off this is a cheap early return — no store handle - /// is constructed and behavior is byte-identical to today. When on, the + /// Additive and gated on the default-ON session dual-write flag + /// (`OPENHUMAN_SESSION_DUAL_WRITE` is a kill switch): when killed this is a + /// cheap early return — no store handle is constructed and behavior is + /// byte-identical to the legacy-only path. When on (the default), the /// store write is fired best-effort on a background task and any error is /// logged (`[session-store]`) and swallowed, so it can never fail or alter a /// chat turn. Records reuse the importer's normalization @@ -274,7 +276,9 @@ impl Agent { ) { use crate::openhuman::session_import::live; - if !live::dual_write_enabled() { + // Config flag (default ON) gates the mirror; the env kill switch can + // still force it off. `self.config` is the effective per-agent config. + if !live::dual_write_enabled(self.config.session_dual_write) { return; } diff --git a/src/openhuman/agent_orchestration/spawn_parallel_graph.rs b/src/openhuman/agent_orchestration/spawn_parallel_graph.rs index b37139c1a4..94b44a24f3 100644 --- a/src/openhuman/agent_orchestration/spawn_parallel_graph.rs +++ b/src/openhuman/agent_orchestration/spawn_parallel_graph.rs @@ -18,6 +18,7 @@ use tinyagents::graph::parallel::{map_reduce, FailurePolicy, ParallelOptions}; use tinyagents::graph::{ ClosureStateReducer, CompiledGraph, GraphBuilder, NodeContext, NodeResult, }; +use tinyagents::harness::retry::RetryPolicy; use tinyagents::harness::workspace::{WorkspaceDescriptor, WorkspaceIsolation}; use tinyagents::{CancellationToken, TinyAgentsError}; @@ -1718,7 +1719,16 @@ async fn run_spawn_parallel_execution_graph( .map_err(|e| format!("spawn_parallel_agents graph compile failed: {e}"))? .with_event_sink(Arc::new( crate::openhuman::tinyagents::observability::GraphTracingSink::new(label), - )); + )) + // Adapter-first landing of the crate-native per-node RetryPolicy + // (tinyagents 1.5.0 `CompiledGraph::with_node_retry`). Conservative: + // `max_attempts(1)` preserves today's single-attempt phase semantics + // exactly (no bespoke retry glue existed on these phases) and backoff + // sleeping stays off (the default). Per-worker fanout resilience is + // owned inside the worker/collect phases, not the phase-graph node loop; + // this wires the crate seam so a future slice can raise the attempt cap + // without re-plumbing. + .with_node_retry(RetryPolicy::default().with_max_attempts(1)); tracing::debug!( parent_session = %parent_session, diff --git a/src/openhuman/config/schema/agent.rs b/src/openhuman/config/schema/agent.rs index 713682e800..8fdd4ffe4f 100644 --- a/src/openhuman/config/schema/agent.rs +++ b/src/openhuman/config/schema/agent.rs @@ -243,6 +243,26 @@ pub struct AgentConfig { /// `OPENHUMAN_TOOL_TIMEOUT_SECS` env var still overrides it when set. #[serde(default = "default_agent_timeout_secs")] pub agent_timeout_secs: u64, + + /// Dual-write each completed session turn into the TinyAgents session + /// store (`{workspace}/tinyagents_store/{kv,journal}`) alongside the + /// legacy `session_raw/*.jsonl` transcript (issue #4249, sessions 04.1). + /// + /// Defaults **ON**: the store has to be populated by live turns so the + /// 04.2 read cutover inherits a complete corpus. The write is additive, + /// best-effort, and non-fatal — a store-write failure never affects the + /// chat turn or the authoritative legacy JSONL. The + /// `OPENHUMAN_SESSION_DUAL_WRITE` env var is a kill switch that overrides + /// this flag in either direction: a falsy value (`0`/`false`/`no`/`off`) + /// forces the dual-write OFF regardless of config; a truthy value forces + /// it ON. See + /// [`crate::openhuman::session_import::live::dual_write_enabled`]. + #[serde(default = "default_session_dual_write")] + pub session_dual_write: bool, +} + +fn default_session_dual_write() -> bool { + true } fn default_tool_result_budget_bytes() -> usize { @@ -374,6 +394,7 @@ impl Default for AgentConfig { channel_permissions: std::collections::HashMap::new(), tool_result_budget_bytes: default_tool_result_budget_bytes(), agent_timeout_secs: default_agent_timeout_secs(), + session_dual_write: default_session_dual_write(), } } } diff --git a/src/openhuman/session_import/live.rs b/src/openhuman/session_import/live.rs index e27aa01253..275f2cbf8b 100644 --- a/src/openhuman/session_import/live.rs +++ b/src/openhuman/session_import/live.rs @@ -1,7 +1,11 @@ //! Live dual-write of new session turns into the TinyAgents store. //! -//! Additive, best-effort, and behind the `OPENHUMAN_SESSION_DUAL_WRITE` -//! environment flag (default **OFF**). The legacy `session_raw/*.jsonl` +//! Additive, best-effort, and gated by the `AgentConfig::session_dual_write` +//! **config flag** which **defaults ON** ([`dual_write_enabled`]); the +//! `OPENHUMAN_SESSION_DUAL_WRITE` env var is a **kill switch** — set it to a +//! falsey value (`0`/`false`/`no`/`off`/`disable`) to force the mirror off +//! regardless of config. This mirrors the `OPENHUMAN_APPROVAL_GATE` +//! default-on-with-kill-switch idiom. The legacy `session_raw/*.jsonl` //! transcript (`session/turn/session_io.rs` → `transcript::write_transcript`) //! stays the primary and authoritative writer; this module mirrors each //! *already-persisted* turn into the same store layout the Phase-1 importer @@ -15,7 +19,7 @@ //! nothing in this module touches the legacy transcript path. use std::path::Path; -use std::sync::OnceLock; +use std::sync::Arc; use anyhow::{Context, Result}; use tinyagents::harness::store::{AppendStore, Store}; @@ -28,29 +32,83 @@ use super::convert::{ use super::ops::{open_session_stores, SessionStores}; use super::types::{DescriptorSource, NS_SESSIONS}; -/// Environment flag gating the live session-store dual-write. Default OFF. +/// Kill-switch env var for the live session-store dual-write. The config flag +/// (`AgentConfig::session_dual_write`) defaults ON; setting this env var to a +/// falsey value forces the mirror OFF regardless of config. See +/// [`dual_write_enabled`]. const DUAL_WRITE_ENV: &str = "OPENHUMAN_SESSION_DUAL_WRITE"; -/// Whether the live session-store dual-write is enabled. +/// Whether the `OPENHUMAN_SESSION_DUAL_WRITE` kill switch is engaged (set to a +/// falsey value). Unset — or any non-falsey value — leaves the mirror driven by +/// the config flag. Read live (not cached) so a config reload / env change is +/// honored on the next turn. +fn kill_switch_engaged() -> bool { + match std::env::var(DUAL_WRITE_ENV) { + Ok(v) => matches!( + v.trim().to_ascii_lowercase().as_str(), + "0" | "false" | "no" | "off" | "disable" | "disabled" + ), + Err(_) => false, + } +} + +/// Store-registry name under which the session KV store is registered on each +/// turn's `RunContext.stores` (issue #4249, 04.1). Slash-free so it round-trips +/// the crate `FileStore` name sanitizer. This is a forward-looking, +/// harness-visible handle to the same `tinyagents_store` KV tree the live +/// dual-write mirrors into; readers stay legacy until 04.2. +pub const TINYAGENTS_SESSION_KV_STORE: &str = "openhuman_sessions"; + +/// Whether the live session-store dual-write is enabled for this turn. +/// +/// `config_enabled` is the `AgentConfig::session_dual_write` flag, which +/// **defaults ON**. The `OPENHUMAN_SESSION_DUAL_WRITE` env var is a pure kill +/// switch: an explicit falsey value (case-insensitive +/// `0`/`false`/`no`/`off`/`disable`/`disabled`) forces the mirror OFF regardless +/// of config; otherwise the config flag wins. Read live (never cached) so a +/// config reload / env change is honored on the next turn. This keeps a clean +/// 04.2 seam (reads can flip independently) while making the mirror the default +/// so new turns land in the store without opt-in. +pub fn dual_write_enabled(config_enabled: bool) -> bool { + let killed = kill_switch_engaged(); + let enabled = config_enabled && !killed; + log::debug!( + "[session-store] dual-write decision config_enabled={config_enabled} kill_switch={killed} enabled={enabled}" + ); + enabled +} + +/// Open the session KV store as an `Arc` for registration on the +/// per-turn `RunContext.stores` under [`TINYAGENTS_SESSION_KV_STORE`], honoring +/// the dual-write flag (config default ON + env kill switch). /// -/// Read **once** from the environment and cached for the process lifetime. -/// Truthy values (case-insensitive): `1`, `true`, `yes`, `on`. Anything else — -/// including an unset variable — is OFF, so default behavior is byte-identical -/// to today (no store handle constructed, no extra writes). -pub fn dual_write_enabled() -> bool { - static ENABLED: OnceLock = OnceLock::new(); - *ENABLED.get_or_init(|| { - let enabled = std::env::var(DUAL_WRITE_ENV) - .map(|v| { - matches!( - v.trim().to_ascii_lowercase().as_str(), - "1" | "true" | "yes" | "on" - ) - }) - .unwrap_or(false); - log::debug!("[session-store] dual-write flag {DUAL_WRITE_ENV} resolved to {enabled}"); - enabled - }) +/// Best-effort: `None` when the dual-write is disabled **or** the config (hence +/// workspace) cannot be resolved. When present it is the exact same +/// `{workspace}/tinyagents_store/kv` `FileStore` the importer and the live +/// dual-write use, so a harness-side reader (04.2+) sees identical records. The +/// journal (`JsonlAppendStore`, an `AppendStore` rather than a `Store`) is not +/// registrable on the `StoreRegistry`; the dual-write opens it directly. +pub async fn session_kv_store() -> Option> { + let cfg = match crate::openhuman::config::Config::load_or_init().await { + Ok(cfg) => cfg, + Err(err) => { + log::warn!("[session-store] cannot resolve config for store registration: {err:#}"); + return None; + } + }; + if !dual_write_enabled(cfg.agent.session_dual_write) { + log::debug!( + "[session-store] dual-write disabled; skipping RunContext session-store registration" + ); + return None; + } + let workspace = cfg.workspace_dir; + let SessionStores { kv, .. } = open_session_stores(&workspace); + log::debug!( + "[session-store] opened session kv store for RunContext.stores workspace={}", + workspace.display() + ); + Some(Arc::new(kv)) } /// Mirror one completed turn's transcript into the TinyAgents store. diff --git a/src/openhuman/session_import/live_tests.rs b/src/openhuman/session_import/live_tests.rs new file mode 100644 index 0000000000..a42b70d9dc --- /dev/null +++ b/src/openhuman/session_import/live_tests.rs @@ -0,0 +1,206 @@ +//! Write-side parity for the live session-store dual-write (issue #4249, 04.1). +//! +//! Drives the two persistence paths — the legacy authoritative JSONL writer +//! (`transcript::write_transcript`) and the live store mirror +//! ([`super::live::write_live_turn`]) — with the *same* completed turn, then +//! asserts the store journal renders byte-for-byte the same +//! [`JournalMessage`]s the importer's parity helper reads back off the legacy +//! JSONL. This proves the two writers stay shape-identical for new turns +//! without depending on the read path (04.2). + +use std::path::Path; + +use tempfile::TempDir; +use tinyagents::harness::store::{AppendStore, FileStore, JsonlAppendStore, Store}; + +use super::convert::{sanitize_store_name, stream_name}; +use super::live::{dual_write_enabled, write_live_turn}; +use super::ops::store_root; +use super::types::{JournalMessage, SessionDescriptor, NS_SESSIONS}; +use crate::openhuman::agent::harness::session::transcript::{ + attach_turn_usage_metadata, read_transcript, write_transcript, MessageUsage, SessionTranscript, + TranscriptMeta, TurnUsage, +}; +use crate::openhuman::inference::provider::{ChatMessage, ToolCall}; + +/// A transcript meta header matching the importer's `native` fixture shape. +fn meta(thread_id: &str) -> TranscriptMeta { + TranscriptMeta { + agent_name: "orchestrator".to_string(), + agent_id: Some("orchestrator".to_string()), + agent_type: Some("root".to_string()), + dispatcher: "native".to_string(), + provider: Some("anthropic".to_string()), + model: Some("claude".to_string()), + created: "2024-01-01T00:00:00Z".to_string(), + updated: "2024-01-01T00:05:00Z".to_string(), + turn_count: 1, + input_tokens: 100, + output_tokens: 50, + cached_input_tokens: 20, + charged_amount_usd: 0.05, + thread_id: Some(thread_id.to_string()), + task_id: None, + } +} + +/// Per-turn usage carrying a native tool call, so tool-call ids are exercised +/// on the parity path (acceptance: "tool-call ids"). +fn turn_usage() -> TurnUsage { + TurnUsage { + provider: "anthropic".to_string(), + model: "claude".to_string(), + usage: MessageUsage { + input: 100, + output: 50, + cached_input: 20, + context_window: 200_000, + cost_usd: 0.05, + }, + ts: "2024-01-01T00:00:01Z".to_string(), + reasoning_content: None, + tool_calls: vec![ToolCall { + id: "tc1".to_string(), + name: "read_file".to_string(), + arguments: "{\"path\":\"x\"}".to_string(), + extra_content: None, + }], + iteration: 1, + } +} + +/// Read the store journal stream back into `JournalMessage`s, mirroring the +/// importer's `journal_readback` helper. +async fn journal_readback(ws: &Path, stream: &str) -> Vec { + let journal = JsonlAppendStore::new(store_root(ws).join("journal")); + journal + .read_from(stream, 0) + .await + .expect("journal read") + .into_iter() + .map(|(_, v)| serde_json::from_value(v).expect("journal record shape")) + .collect() +} + +#[tokio::test] +async fn live_dual_write_matches_legacy_jsonl_render() { + let ws = TempDir::new().expect("tempdir"); + let stem = "1719_orchestrator"; + let jsonl_path = ws.path().join("session_raw").join(format!("{stem}.jsonl")); + + // A user turn + an assistant turn. The base messages carry no usage + // metadata: the legacy writer embeds it from its `turn_usage` argument, + // exactly as `persist_session_transcript` does in production. + let base_messages = vec![ChatMessage::user("hi"), ChatMessage::assistant("done")]; + let meta = meta("t-root"); + let usage = turn_usage(); + + // (1) Legacy authoritative write — the primary persistence path. + write_transcript(&jsonl_path, &base_messages, &meta, Some(&usage)).expect("legacy write"); + + // (2) Live dual-write — replicate `session_io`'s construction: attach the + // turn usage to the last assistant message, then mirror into the store. + let mut live_messages = base_messages.clone(); + let last_assistant = live_messages + .iter() + .rposition(|m| m.role == "assistant") + .expect("assistant message present"); + attach_turn_usage_metadata(&mut live_messages[last_assistant], &usage); + let transcript = SessionTranscript { + meta: meta.clone(), + messages: live_messages, + }; + write_live_turn(ws.path(), stem, &transcript) + .await + .expect("live dual-write"); + + // Parity: the store journal must equal the importer's read-back of the + // legacy JSONL, field for field (including reconstructed + // `openhuman_turn_usage` metadata and the tool-call id). + let expected: Vec = read_transcript(&jsonl_path) + .expect("read legacy transcript") + .messages + .iter() + .map(JournalMessage::from) + .collect(); + let actual = journal_readback(ws.path(), &stream_name(stem)).await; + assert_eq!( + actual, expected, + "live store stream diverges from the legacy JSONL render" + ); + + // The assistant record must carry the tool-call id via reconstructed usage. + let assistant = actual + .iter() + .find(|m| m.role == "assistant") + .expect("assistant record"); + let tool_id = assistant + .extra_metadata + .as_ref() + .and_then(|m| m.get("openhuman_turn_usage")) + .and_then(|u| u.get("tool_calls")) + .and_then(|t| t.get(0)) + .and_then(|c| c.get("id")) + .and_then(|id| id.as_str()); + assert_eq!(tool_id, Some("tc1"), "tool-call id lost on the store path"); + + // The session descriptor is upserted under the sanitized stem with the + // stem's thread id and journal stream, matching the importer's projection. + let kv = FileStore::new(store_root(ws.path()).join("kv")); + let desc_value = kv + .get(NS_SESSIONS, &sanitize_store_name(stem)) + .await + .expect("kv get") + .expect("descriptor present after live write"); + let desc: SessionDescriptor = serde_json::from_value(desc_value).expect("descriptor shape"); + assert_eq!(desc.session_key, stem); + assert_eq!(desc.thread_id, "t-root"); + assert!(!desc.thread_id_synthesized); + assert_eq!(desc.stream, stream_name(stem)); + assert_eq!(desc.dispatcher, "native"); + assert_eq!(desc.provider.as_deref(), Some("anthropic")); + assert_eq!(desc.model.as_deref(), Some("claude")); +} + +/// The dual-write is driven by the `AgentConfig::session_dual_write` config +/// flag (default ON) with the `OPENHUMAN_SESSION_DUAL_WRITE` env var as a pure +/// kill switch. This exercises the decision matrix directly. Env mutation is +/// process-global, so all assertions live in one serial test and the var is +/// restored on exit; no other test reads this var. +#[test] +fn config_flag_and_env_kill_switch() { + const ENV: &str = "OPENHUMAN_SESSION_DUAL_WRITE"; + let prior = std::env::var(ENV).ok(); + + // Config OFF disables regardless of env. + std::env::remove_var(ENV); + assert!(!dual_write_enabled(false), "config off disables"); + + // Config ON (the default) enables when the env is unset. + assert!(dual_write_enabled(true), "config on + no env enables"); + + // A falsey env value is the kill switch: forces OFF even with config ON. + for killed in ["0", "false", "no", "off", "disable", "disabled", "OFF"] { + std::env::set_var(ENV, killed); + assert!( + !dual_write_enabled(true), + "kill switch value {killed:?} must force off" + ); + } + + // A non-falsey env value does not force on: config still governs. + std::env::set_var(ENV, "1"); + assert!( + dual_write_enabled(true), + "non-falsey env leaves config ON on" + ); + assert!( + !dual_write_enabled(false), + "non-falsey env does not force config-off on" + ); + + match prior { + Some(v) => std::env::set_var(ENV, v), + None => std::env::remove_var(ENV), + } +} diff --git a/src/openhuman/session_import/mod.rs b/src/openhuman/session_import/mod.rs index 4a295036ea..94a693abca 100644 --- a/src/openhuman/session_import/mod.rs +++ b/src/openhuman/session_import/mod.rs @@ -21,5 +21,7 @@ pub use schemas::{ }; pub use types::{ImportOptions, ImportSummary}; +#[cfg(test)] +mod live_tests; #[cfg(test)] mod ops_tests; diff --git a/src/openhuman/thread_goals/crate_adapter.rs b/src/openhuman/thread_goals/crate_adapter.rs new file mode 100644 index 0000000000..66cbbe6fcb --- /dev/null +++ b/src/openhuman/thread_goals/crate_adapter.rs @@ -0,0 +1,510 @@ +//! Adapter seam: mirror OpenHuman `thread_goals` onto the tinyagents +//! `graph::goals` crate store (issue #4249, plan §C2). +//! +//! **Adapter-first, dual-write.** The legacy per-thread file-JSON store +//! ([`super::store`]) stays **authoritative for reads**; this module *also* +//! mirrors every goal mutation into the crate's `graph::goals` store so a later +//! slice can flip reads over to the crate with zero data migration. The mirror +//! is a **faithful copy**: the crate row carries the *same* `goal_id`, +//! timestamps, and counters as the legacy row (we `put` the converted value +//! directly rather than calling the crate's `store::set`, which would re-mint a +//! `goal-` id and reset counters). +//! +//! Persistence target: the crate [`Store`] rooted at the same workspace KV tree +//! as the 04-sessions journal (`{workspace}/tinyagents_store/kv`), namespace +//! [`GOALS_NAMESPACE`] (`graph.goals`), keyed by `hex(thread_id)` — byte-for-byte +//! the key the crate's own `graph::goals::store` computes, so the crate reader +//! finds exactly what we wrote. +//! +//! # Single-writer constraint +//! +//! The crate `Store` has **no compare-and-set and no cross-key transaction** +//! (see the crate `graph::goals::store` docs). Its per-thread atomicity is a +//! *process-local* async mutex, and the legacy store uses a process-wide mutex. +//! Neither is safe across processes. This is acceptable here because **the +//! OpenHuman core is the single writer** of thread goals — RPC handlers, agent +//! tools, and the heartbeat continuation runtime all run inside one core +//! process. Do not add a second mutating writer (a sidecar, a second core, a +//! cron in another process) without introducing a real CAS first. +//! +//! # Shadow mode +//! +//! The tool/host surface mirror is gated OFF by default behind +//! [`crate_goals_shadow_enabled`] (`OPENHUMAN_THREAD_GOALS_CRATE_SHADOW`). When +//! ON it acts on the legacy result and merely *logs* any crate-vs-legacy +//! divergence — it never changes what a caller observes. + +use std::path::Path; +use std::sync::Arc; + +use tinyagents::graph::goals::store::GOALS_NAMESPACE; +use tinyagents::graph::goals::{ThreadGoal as CrateThreadGoal, ThreadGoalStatus as CrateStatus}; +use tinyagents::harness::store::Store; + +use super::types::{ThreadGoal, ThreadGoalStatus}; +use crate::openhuman::session_import::ops::open_session_stores; + +/// Env flag gating the crate-goals **shadow** mirror on the tool/host surface. +/// Defaults **OFF**; any of `1`/`true`/`yes`/`on` (case-insensitive) enables it. +const SHADOW_ENV: &str = "OPENHUMAN_THREAD_GOALS_CRATE_SHADOW"; + +/// Whether the crate-goals shadow mirror is enabled (defaults OFF). +/// +/// Shadow mode mirrors legacy mutations into the crate store and logs any +/// divergence; it never changes the caller-observed (legacy) result. +pub fn crate_goals_shadow_enabled() -> bool { + std::env::var(SHADOW_ENV) + .map(|v| { + let v = v.trim().to_ascii_lowercase(); + matches!(v.as_str(), "1" | "true" | "yes" | "on") + }) + .unwrap_or(false) +} + +/// Open the crate [`Store`] handle used for the goals mirror, rooted at the +/// shared workspace KV tree (`{workspace}/tinyagents_store/kv`). Same layout the +/// 04-sessions journal + status store use, so everything lives under one tree. +pub(crate) fn crate_goals_store(workspace_dir: &Path) -> Arc { + Arc::new(open_session_stores(workspace_dir).kv) +} + +/// The crate store key for a thread's goal: lowercase hex of the (trimmed) +/// thread-id bytes. This MUST match the crate's private `graph::goals::store` +/// key function exactly so the crate reader resolves our mirrored value. +fn goal_key(thread_id: &str) -> String { + thread_id + .trim() + .as_bytes() + .iter() + .map(|b| format!("{b:02x}")) + .collect() +} + +/// Map a legacy [`ThreadGoalStatus`] onto the crate [`CrateStatus`]. The two +/// enums are 1:1 (Active/Paused/BudgetLimited/Complete) — this is the mapping +/// the parity tests pin. +pub(crate) fn to_crate_status(status: ThreadGoalStatus) -> CrateStatus { + match status { + ThreadGoalStatus::Active => CrateStatus::Active, + ThreadGoalStatus::Paused => CrateStatus::Paused, + ThreadGoalStatus::BudgetLimited => CrateStatus::BudgetLimited, + ThreadGoalStatus::Complete => CrateStatus::Complete, + } +} + +/// Map a crate [`CrateStatus`] back onto the legacy [`ThreadGoalStatus`] (the +/// inverse of [`to_crate_status`]). +pub(crate) fn from_crate_status(status: CrateStatus) -> ThreadGoalStatus { + match status { + CrateStatus::Active => ThreadGoalStatus::Active, + CrateStatus::Paused => ThreadGoalStatus::Paused, + CrateStatus::BudgetLimited => ThreadGoalStatus::BudgetLimited, + CrateStatus::Complete => ThreadGoalStatus::Complete, + } +} + +/// Convert a legacy [`ThreadGoal`] into the crate [`CrateThreadGoal`], +/// preserving every field verbatim (id, objective, status, budget/usage +/// counters, timestamps, continuation flag). A **faithful** projection — no +/// re-minting, no counter reset. +pub(crate) fn to_crate_goal(goal: &ThreadGoal) -> CrateThreadGoal { + CrateThreadGoal { + thread_id: goal.thread_id.clone(), + goal_id: goal.goal_id.clone(), + objective: goal.objective.clone(), + status: to_crate_status(goal.status), + token_budget: goal.token_budget, + tokens_used: goal.tokens_used, + time_used_seconds: goal.time_used_seconds, + created_at_ms: goal.created_at_ms, + updated_at_ms: goal.updated_at_ms, + continuation_suppressed: goal.continuation_suppressed, + } +} + +/// Convert a crate [`CrateThreadGoal`] back into a legacy [`ThreadGoal`] (the +/// inverse of [`to_crate_goal`]), used by the shadow-divergence comparison. +pub(crate) fn from_crate_goal(goal: &CrateThreadGoal) -> ThreadGoal { + ThreadGoal { + thread_id: goal.thread_id.clone(), + goal_id: goal.goal_id.clone(), + objective: goal.objective.clone(), + status: from_crate_status(goal.status), + token_budget: goal.token_budget, + tokens_used: goal.tokens_used, + time_used_seconds: goal.time_used_seconds, + created_at_ms: goal.created_at_ms, + updated_at_ms: goal.updated_at_ms, + continuation_suppressed: goal.continuation_suppressed, + } +} + +/// Write the faithful crate mirror of `goal` into `store` (ns `graph.goals`, +/// key `hex(thread_id)`). Overwrites any prior mirror; idempotent for an +/// unchanged value. +pub(crate) async fn put_mirror(store: &Arc, goal: &ThreadGoal) -> Result<(), String> { + let crate_goal = to_crate_goal(goal); + let value = + serde_json::to_value(&crate_goal).map_err(|e| format!("serialize crate goal: {e}"))?; + store + .put(GOALS_NAMESPACE, &goal_key(&goal.thread_id), value) + .await + .map_err(|e| format!("mirror thread goal into {GOALS_NAMESPACE}: {e}")) +} + +/// Read the current crate mirror for `thread_id`, or `None`. Skips a mirror that +/// fails to decode (treated as absent) so a legacy/corrupt row can't wedge the +/// shadow path. +pub(crate) async fn get_mirror( + store: &Arc, + thread_id: &str, +) -> Result, String> { + let value = store + .get(GOALS_NAMESPACE, &goal_key(thread_id)) + .await + .map_err(|e| format!("read crate goal mirror: {e}"))?; + match value { + Some(v) => match serde_json::from_value::(v) { + Ok(crate_goal) => Ok(Some(from_crate_goal(&crate_goal))), + Err(e) => { + tracing::debug!( + thread_id = %thread_id, + error = %e, + "[thread_goals][crate-shadow] undecodable crate mirror; treating as absent" + ); + Ok(None) + } + }, + None => Ok(None), + } +} + +/// Delete the crate mirror for `thread_id`. No-op when absent (matches the +/// crate/legacy clear contract). +pub(crate) async fn delete_mirror(store: &Arc, thread_id: &str) -> Result<(), String> { + store + .delete(GOALS_NAMESPACE, &goal_key(thread_id)) + .await + .map_err(|e| format!("delete crate goal mirror: {e}")) +} + +// ── Shadow-mode surface (flag-gated; acts on legacy, logs divergence) ───────── + +/// Shadow-mirror a legacy mutation result into the crate store, logging any +/// crate-vs-legacy divergence. No-op (and no store I/O) when the shadow flag is +/// OFF. Best-effort: a mirror error is logged, never propagated — the shadow +/// path must never change caller-observed behavior. +pub async fn shadow_mirror_goal(workspace_dir: &Path, legacy_goal: &ThreadGoal) { + if !crate_goals_shadow_enabled() { + return; + } + let store = crate_goals_store(workspace_dir); + // Log divergence against the pre-write crate state (status/counter drift). + match get_mirror(&store, &legacy_goal.thread_id).await { + Ok(Some(prior)) if prior != *legacy_goal => { + tracing::debug!( + thread_id = %legacy_goal.thread_id, + goal_id = %legacy_goal.goal_id, + crate_status = prior.status.as_str(), + legacy_status = legacy_goal.status.as_str(), + crate_tokens = prior.tokens_used, + legacy_tokens = legacy_goal.tokens_used, + "[thread_goals][crate-shadow] mirror diverges from prior crate row; overwriting with legacy" + ); + } + Ok(_) => {} + Err(e) => { + tracing::debug!(error = %e, "[thread_goals][crate-shadow] prior-read failed"); + } + } + if let Err(e) = put_mirror(&store, legacy_goal).await { + tracing::debug!( + thread_id = %legacy_goal.thread_id, + error = %e, + "[thread_goals][crate-shadow] mirror write failed (ignored)" + ); + } else { + tracing::debug!( + thread_id = %legacy_goal.thread_id, + goal_id = %legacy_goal.goal_id, + status = legacy_goal.status.as_str(), + "[thread_goals][crate-shadow] mirrored goal into graph.goals" + ); + } +} + +/// Shadow-mirror a legacy clear into the crate store. No-op when the shadow flag +/// is OFF. Best-effort (errors logged, never propagated). +pub async fn shadow_mirror_clear(workspace_dir: &Path, thread_id: &str) { + if !crate_goals_shadow_enabled() { + return; + } + let store = crate_goals_store(workspace_dir); + if let Err(e) = delete_mirror(&store, thread_id).await { + tracing::debug!( + thread_id = %thread_id, + error = %e, + "[thread_goals][crate-shadow] mirror clear failed (ignored)" + ); + } else { + tracing::debug!( + thread_id = %thread_id, + "[thread_goals][crate-shadow] cleared goal mirror in graph.goals" + ); + } +} + +// ── One-time migration helper (callable, logged, NOT wired to boot) ─────────── + +/// Outcome of a [`migrate_legacy_goals_into_crate_store`] run. +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub struct GoalMigrationReport { + /// Legacy goal rows examined. + pub total: usize, + /// Rows written into the crate store (absent or divergent mirror). + pub copied: usize, + /// Rows already present in the crate store with an identical value. + pub skipped: usize, +} + +/// Copy every existing legacy thread-goal row into the crate `graph.goals` +/// store. **Idempotent**: a row whose crate mirror already equals the legacy +/// value is skipped, so re-running does no writes and reports everything under +/// `skipped`. +/// +/// Callable + logged; deliberately **not wired into boot** in this slice (a +/// later slice schedules it behind a one-shot marker, mirroring the +/// session-import global marker). Honors the single-writer constraint: run it +/// only inside the core process. +pub async fn migrate_legacy_goals_into_crate_store( + workspace_dir: &Path, +) -> Result { + let legacy = super::store::list_all(workspace_dir).await?; + let store = crate_goals_store(workspace_dir); + let mut report = GoalMigrationReport { + total: legacy.len(), + ..Default::default() + }; + tracing::info!( + workspace = %workspace_dir.display(), + total = report.total, + "[thread_goals][crate-migrate] start copy legacy goals → graph.goals" + ); + for goal in &legacy { + match get_mirror(&store, &goal.thread_id).await { + Ok(Some(existing)) if existing == *goal => { + report.skipped += 1; + tracing::debug!( + thread_id = %goal.thread_id, + goal_id = %goal.goal_id, + "[thread_goals][crate-migrate] skip (already mirrored)" + ); + continue; + } + Ok(_) => {} + Err(e) => { + // Read failure → attempt the write anyway (fail-forward copy). + tracing::debug!( + thread_id = %goal.thread_id, + error = %e, + "[thread_goals][crate-migrate] mirror pre-read failed; copying anyway" + ); + } + } + put_mirror(&store, goal).await?; + report.copied += 1; + tracing::debug!( + thread_id = %goal.thread_id, + goal_id = %goal.goal_id, + "[thread_goals][crate-migrate] copied" + ); + } + tracing::info!( + total = report.total, + copied = report.copied, + skipped = report.skipped, + "[thread_goals][crate-migrate] done" + ); + Ok(report) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::openhuman::thread_goals::store as legacy_store; + + fn sample_goal(status: ThreadGoalStatus) -> ThreadGoal { + ThreadGoal { + thread_id: "thread-α".into(), + goal_id: "goal-uuid-1".into(), + objective: "ship the migration".into(), + status, + token_budget: Some(5_000), + tokens_used: 1_234, + time_used_seconds: 42, + created_at_ms: 1_000, + updated_at_ms: 2_000, + continuation_suppressed: true, + } + } + + #[test] + fn status_mapping_is_bijective_across_all_variants() { + for status in [ + ThreadGoalStatus::Active, + ThreadGoalStatus::Paused, + ThreadGoalStatus::BudgetLimited, + ThreadGoalStatus::Complete, + ] { + let round = from_crate_status(to_crate_status(status)); + assert_eq!(round, status, "status round-trip must be identity"); + } + // Pin the exact crate labels the mapping produces. + assert_eq!(to_crate_status(ThreadGoalStatus::Active).as_str(), "active"); + assert_eq!(to_crate_status(ThreadGoalStatus::Paused).as_str(), "paused"); + assert_eq!( + to_crate_status(ThreadGoalStatus::BudgetLimited).as_str(), + "budget_limited" + ); + assert_eq!( + to_crate_status(ThreadGoalStatus::Complete).as_str(), + "complete" + ); + } + + #[test] + fn goal_mapping_preserves_every_field_and_completion_contract() { + // Completion contract: Complete + continuation_suppressed carries through. + let g = sample_goal(ThreadGoalStatus::Complete); + let crate_goal = to_crate_goal(&g); + assert_eq!(crate_goal.thread_id, g.thread_id); + assert_eq!( + crate_goal.goal_id, g.goal_id, + "goal_id preserved (no re-mint)" + ); + assert_eq!(crate_goal.objective, g.objective); + assert_eq!(crate_goal.status, CrateStatus::Complete); + assert_eq!(crate_goal.token_budget, g.token_budget, "budget preserved"); + assert_eq!(crate_goal.tokens_used, g.tokens_used, "usage preserved"); + assert_eq!(crate_goal.time_used_seconds, g.time_used_seconds); + assert_eq!(crate_goal.created_at_ms, g.created_at_ms); + assert_eq!(crate_goal.updated_at_ms, g.updated_at_ms); + assert!( + crate_goal.continuation_suppressed, + "completion suppresses continuation" + ); + // Full round-trip identity. + assert_eq!(from_crate_goal(&crate_goal), g); + } + + #[test] + fn budget_limited_maps_and_over_budget_carries() { + let mut g = sample_goal(ThreadGoalStatus::BudgetLimited); + g.tokens_used = 6_000; // over the 5_000 budget + let crate_goal = to_crate_goal(&g); + assert_eq!(crate_goal.status, CrateStatus::BudgetLimited); + assert!(crate_goal.over_budget(), "over-budget invariant carries"); + assert_eq!(crate_goal.budget_remaining(), Some(0)); + } + + #[tokio::test] + async fn put_get_delete_mirror_round_trip() { + let tmp = tempfile::tempdir().unwrap(); + let store = crate_goals_store(tmp.path()); + let g = sample_goal(ThreadGoalStatus::Active); + + assert!(get_mirror(&store, &g.thread_id).await.unwrap().is_none()); + put_mirror(&store, &g).await.unwrap(); + let read = get_mirror(&store, &g.thread_id).await.unwrap().unwrap(); + assert_eq!(read, g, "mirror round-trips the exact legacy value"); + + delete_mirror(&store, &g.thread_id).await.unwrap(); + assert!(get_mirror(&store, &g.thread_id).await.unwrap().is_none()); + // Delete is idempotent (no-op when absent). + delete_mirror(&store, &g.thread_id).await.unwrap(); + } + + #[tokio::test] + async fn crate_reader_resolves_the_mirrored_key() { + // Proves the key/namespace we write matches what the crate's own + // `graph::goals::store` reader computes — the whole point of the mirror. + let tmp = tempfile::tempdir().unwrap(); + let store = crate_goals_store(tmp.path()); + let g = sample_goal(ThreadGoalStatus::Paused); + put_mirror(&store, &g).await.unwrap(); + + let via_crate = tinyagents::graph::goals::store::get(&store, &g.thread_id) + .await + .unwrap() + .expect("crate reader finds the mirrored row"); + assert_eq!(via_crate.goal_id, g.goal_id); + assert_eq!(via_crate.status, CrateStatus::Paused); + assert_eq!(via_crate.tokens_used, g.tokens_used); + } + + #[tokio::test] + async fn migration_copies_then_is_idempotent() { + let tmp = tempfile::tempdir().unwrap(); + let dir = tmp.path(); + // Seed two legacy goals via the authoritative legacy store. + legacy_store::set(dir, "t1", "objective one", Some(1_000)) + .await + .unwrap(); + let g2 = legacy_store::set(dir, "t2", "objective two", None) + .await + .unwrap(); + legacy_store::account_usage(dir, "t2", &g2.goal_id, 50, 3) + .await + .unwrap(); + + // First run copies both. + let r1 = migrate_legacy_goals_into_crate_store(dir).await.unwrap(); + assert_eq!(r1.total, 2); + assert_eq!(r1.copied, 2); + assert_eq!(r1.skipped, 0); + + // Crate rows now match legacy rows exactly. + let store = crate_goals_store(dir); + let m2 = get_mirror(&store, "t2").await.unwrap().unwrap(); + assert_eq!(m2.tokens_used, 50); + assert_eq!(m2.objective, "objective two"); + + // Second run is a no-op (idempotent) — everything already mirrored. + let r2 = migrate_legacy_goals_into_crate_store(dir).await.unwrap(); + assert_eq!(r2.total, 2); + assert_eq!(r2.copied, 0, "idempotent: nothing re-copied"); + assert_eq!(r2.skipped, 2); + } + + #[tokio::test] + async fn migration_recopies_a_diverged_row() { + let tmp = tempfile::tempdir().unwrap(); + let dir = tmp.path(); + let g = legacy_store::set(dir, "t", "obj", None).await.unwrap(); + migrate_legacy_goals_into_crate_store(dir).await.unwrap(); + + // Legacy advances (usage accounted) → crate mirror is now stale. + legacy_store::account_usage(dir, "t", &g.goal_id, 99, 1) + .await + .unwrap(); + + let r = migrate_legacy_goals_into_crate_store(dir).await.unwrap(); + assert_eq!(r.copied, 1, "diverged row re-copied"); + assert_eq!(r.skipped, 0); + let store = crate_goals_store(dir); + assert_eq!( + get_mirror(&store, "t").await.unwrap().unwrap().tokens_used, + 99 + ); + } + + #[test] + fn shadow_flag_defaults_off() { + // Not asserting env mutation (process-global); just the default parse. + // An unset/empty value must read as OFF. + assert!(!matches!( + "".trim().to_ascii_lowercase().as_str(), + "1" | "true" | "yes" | "on" + )); + } +} diff --git a/src/openhuman/thread_goals/mod.rs b/src/openhuman/thread_goals/mod.rs index 5ec66b92f6..15b1cf335f 100644 --- a/src/openhuman/thread_goals/mod.rs +++ b/src/openhuman/thread_goals/mod.rs @@ -21,6 +21,7 @@ //! [`store::set_if_absent`]). pub mod continuation; +pub mod crate_adapter; pub mod ops; pub mod runtime; mod schemas; diff --git a/src/openhuman/thread_goals/ops.rs b/src/openhuman/thread_goals/ops.rs index 9a2b0bba77..9784d76d5a 100644 --- a/src/openhuman/thread_goals/ops.rs +++ b/src/openhuman/thread_goals/ops.rs @@ -57,6 +57,7 @@ pub async fn set( log::debug!("[thread_goals] rpc=set thread_id={thread_id}"); let goal = store::set(workspace_dir, thread_id, objective, token_budget).await?; emit_updated(&goal); + super::crate_adapter::shadow_mirror_goal(workspace_dir, &goal).await; Ok(RpcOutcome::single_log( GoalEnvelope { goal: Some(goal.clone()), @@ -77,6 +78,7 @@ pub async fn complete( log::debug!("[thread_goals] rpc=complete thread_id={thread_id}"); let goal = store::complete(workspace_dir, thread_id).await?; emit_updated(&goal); + super::crate_adapter::shadow_mirror_goal(workspace_dir, &goal).await; Ok(RpcOutcome::single_log( GoalEnvelope { goal: Some(goal.clone()), @@ -93,6 +95,7 @@ pub async fn pause( log::debug!("[thread_goals] rpc=pause thread_id={thread_id}"); let goal = store::pause(workspace_dir, thread_id).await?; emit_updated(&goal); + super::crate_adapter::shadow_mirror_goal(workspace_dir, &goal).await; Ok(RpcOutcome::single_log( GoalEnvelope { goal: Some(goal.clone()), @@ -109,6 +112,7 @@ pub async fn resume( log::debug!("[thread_goals] rpc=resume thread_id={thread_id}"); let goal = store::resume(workspace_dir, thread_id).await?; emit_updated(&goal); + super::crate_adapter::shadow_mirror_goal(workspace_dir, &goal).await; Ok(RpcOutcome::single_log( GoalEnvelope { goal: Some(goal.clone()), @@ -129,6 +133,8 @@ pub async fn clear( thread_id: thread_id.to_string(), }); } + // Shadow: mirror the clear into the crate graph.goals store (flag-gated OFF). + super::crate_adapter::shadow_mirror_clear(workspace_dir, thread_id).await; Ok(RpcOutcome::single_log( ClearResult { removed }, format!("cleared thread goal (removed={removed})"), diff --git a/src/openhuman/thread_goals/tools.rs b/src/openhuman/thread_goals/tools.rs index 73fc4149c1..a1ad843d8f 100644 --- a/src/openhuman/thread_goals/tools.rs +++ b/src/openhuman/thread_goals/tools.rs @@ -156,6 +156,9 @@ impl Tool for GoalSetTool { status: goal.status.as_str().to_string(), }, ); + // Shadow: mirror into the crate graph.goals store (flag-gated OFF; + // acts on legacy, logs divergence). Best-effort, never fatal. + super::crate_adapter::shadow_mirror_goal(&self.workspace_dir, &goal).await; Ok(ToolResult::success(format!( "Goal set.\n{}", render_goal(&goal) @@ -212,6 +215,8 @@ impl Tool for GoalCompleteTool { status: goal.status.as_str().to_string(), }, ); + // Shadow: mirror into the crate graph.goals store (flag-gated OFF). + super::crate_adapter::shadow_mirror_goal(&self.workspace_dir, &goal).await; Ok(ToolResult::success(format!( "Goal marked complete.\n{}", render_goal(&goal) diff --git a/src/openhuman/tinyagents/delegation.rs b/src/openhuman/tinyagents/delegation.rs index a3feefb85d..9654e93278 100644 --- a/src/openhuman/tinyagents/delegation.rs +++ b/src/openhuman/tinyagents/delegation.rs @@ -39,6 +39,7 @@ use tinyagents::graph::ClosureStateReducer; use tinyagents::graph::{ Command, CompiledGraph, GraphBuilder, Interrupt, NodeContext, NodeResult, END, }; +use tinyagents::harness::retry::RetryPolicy; use tinyagents::CancellationToken; /// Which stage a delegation node is asking the injected worker to run. @@ -605,7 +606,15 @@ where max_visits_per_node: Some(max_revisions + 2), max_total_steps: (max_revisions + 1) * 4 + 8, ..RecursionPolicy::default() - }); + }) + // Adapter-first landing of the crate-native per-node RetryPolicy + // (tinyagents 1.5.0 `CompiledGraph::with_node_retry`). Conservative: + // `max_attempts(1)` preserves today's single-attempt semantics exactly + // (no bespoke retry glue existed here) and backoff sleeping stays off + // (the default), so a transient node-handler failure surfaces as it does + // today. This wires the seam so raising the attempt cap / enabling + // backoff is a one-line, gated follow-up rather than a rewrite. + .with_node_retry(RetryPolicy::default().with_max_attempts(1)); Ok(graph) } diff --git a/src/openhuman/tinyagents/journal.rs b/src/openhuman/tinyagents/journal.rs index 41def43ee9..c7b1552ec7 100644 --- a/src/openhuman/tinyagents/journal.rs +++ b/src/openhuman/tinyagents/journal.rs @@ -25,14 +25,25 @@ //! sinks here) and its records pass through a [`RedactingSink`] so process //! credentials are masked before anything is persisted. //! +//! ## Stable event ids (05.1) +//! +//! The run [`EventSink`] is seeded by the caller with +//! [`EventSink::with_stream_id`]`(run_id)` (see [`mint_run_id`]), so every +//! persisted observation carries a restart-stable `event_id` of the form +//! `{run_id}-evt-{offset}`. That is the id a late-attaching replay reader +//! reconstructs the timeline from — the same `(stream_id, offset)` always mints +//! the same id, and two runs never collide even if both restart their offset +//! counter at zero. +//! //! ## Follow-ups (not in this slice) //! //! - A replay RPC (`agent.run_events`?) that surfaces [`read_run_events`] / //! [`read_run_status`] to the desktop for mid-run reconnect (05.x). -//! - Sub-agent / graph run lineage (`parent_run_id` / `root_run_id` threading) -//! and per-thread status (`thread_id`) — wired in 05.2/05.3. -//! - Seeding the run [`EventSink`] with `with_stream_id(run_id)` for -//! restart-stable event ids. +//! - Full sub-agent / graph run lineage (`parent_run_id` / `root_run_id` +//! threading) — wired in 05.2/05.3. This slice threads `thread_id` (from the +//! sub-agent task scope) so [`FileStatusStore::list_by_thread`] answers. +//! +//! [`EventSink::with_stream_id`]: tinyagents::harness::events::EventSink::with_stream_id use std::path::PathBuf; use std::sync::{Arc, Mutex}; @@ -41,7 +52,7 @@ use async_trait::async_trait; use tinyagents::error::Result as TaResult; use tinyagents::harness::events::{EventSink, HarnessRunStatus}; -use tinyagents::harness::ids::{ComponentId, HarnessPhase, RunId}; +use tinyagents::harness::ids::{ComponentId, HarnessPhase, RunId, ThreadId}; use tinyagents::harness::observability::{ AgentObservation, FanOutSink, HarnessEventJournal, HarnessStatusStore, JournalSink, RedactingSink, StoreEventJournal, @@ -55,10 +66,18 @@ use crate::openhuman::session_import::ops::open_session_stores; /// it round-trips the crate [`FileStore`] name sanitizer. const STATUS_NS: &str = "run_status"; -/// Mints a fresh, slash-free, process-unique run id (`run.<32-hex>`), used both -/// as the journal stream key and the status-store key. The `simple()` uuid form -/// (no hyphens) keeps the id inside the crate store's allowed-character set. -fn new_run_id() -> RunId { +/// Mints a fresh, slash-free, process-unique run id (`run.<32-hex>`), used three +/// ways for one turn: the [`EventSink::with_stream_id`] prefix (so persisted +/// `event_id`s are the restart-stable `{run_id}-evt-{offset}`), the journal +/// stream key, and the status-store key. The `simple()` uuid form (no hyphens) +/// keeps the id inside the crate store's allowed-character set. +/// +/// The caller mints this *before* creating the run [`EventSink`] so the same id +/// seeds the sink stream prefix and the durable journal/status — see +/// [`attach_turn_journal`]. +/// +/// [`EventSink::with_stream_id`]: tinyagents::harness::events::EventSink::with_stream_id +pub(crate) fn mint_run_id() -> RunId { RunId::new(format!("run.{}", uuid::Uuid::new_v4().simple())) } @@ -268,13 +287,26 @@ impl TurnJournal { /// Attach a durable event journal + status writer to `events`, *in addition to* /// the existing (untouched) [`OpenhumanEventBridge`] subscription. /// +/// `run_id` MUST be the same id the caller passed to +/// [`EventSink::with_stream_id`] when it created `events` (mint it once via +/// [`mint_run_id`]). That shared id is what makes the persisted `event_id`s the +/// restart-stable `{run_id}-evt-{offset}` a late-attach replay reconstructs the +/// timeline from. `thread_id` (when known — e.g. the sub-agent task scope) +/// records the run under a thread so [`FileStatusStore::list_by_thread`] answers. +/// /// Returns a [`TurnJournal`] handle the caller uses to stamp the terminal /// status after the run, or `None` when the store could not be opened (the run /// proceeds unaffected — journaling is best-effort). Safe to call for observed /// and unobserved turns alike: it does not depend on `on_progress`. /// /// [`OpenhumanEventBridge`]: crate::openhuman::tinyagents::observability::OpenhumanEventBridge -pub(crate) async fn attach_turn_journal(events: &EventSink, model: &str) -> Option { +/// [`EventSink::with_stream_id`]: tinyagents::harness::events::EventSink::with_stream_id +pub(crate) async fn attach_turn_journal( + events: &EventSink, + model: &str, + run_id: RunId, + thread_id: Option, +) -> Option { let workspace = match resolve_workspace().await { Ok(dir) => dir, Err(err) => { @@ -284,11 +316,12 @@ pub(crate) async fn attach_turn_journal(events: &EventSink, model: &str) -> Opti }; let stores = open_session_stores(&workspace); - let run_id = new_run_id(); // Event journal: crate StoreEventJournal over the 04-sessions JsonlAppendStore // (stream key = run id). Wrapped in a JournalSink (stamps run lineage) and a - // RedactingSink (masks process credentials) before persisting. + // RedactingSink (masks process credentials) before persisting. Because + // `events` was seeded with `with_stream_id(run_id)`, every persisted + // observation's `event_id` is the stable `{run_id}-evt-{offset}`. let journal: Arc = Arc::new(StoreEventJournal::new(stores.journal)); let journal_sink = JournalSink::new(journal, run_id.clone()); let redacting = RedactingSink::new(Arc::new(journal_sink), openhuman_redaction_secrets()); @@ -299,9 +332,13 @@ pub(crate) async fn attach_turn_journal(events: &EventSink, model: &str) -> Opti let fanout = FanOutSink::new().with(Arc::new(redacting)); events.subscribe(Arc::new(fanout)); - // Status store: durable, Store-backed. Seed an initial `running` snapshot. + // Status store: durable, Store-backed. Seed an initial `running` snapshot, + // recording the thread (when known) so list_by_thread answers at run start. let status_store = Arc::new(FileStatusStore::new(stores.kv)); let mut status = HarnessRunStatus::new(run_id.clone(), ComponentId::new(model.to_string())); + if let Some(thread_id) = thread_id { + status = status.with_thread(thread_id); + } status.mark_running(HarnessPhase::Model); if let Err(err) = status_store.put_status(status.clone()).await { log::debug!( @@ -311,8 +348,9 @@ pub(crate) async fn attach_turn_journal(events: &EventSink, model: &str) -> Opti } log::debug!( - "[journal] attached durable event journal run_id={} model={model}", - run_id.as_str() + "[journal] attached durable event journal run_id={} thread={:?} model={model}", + run_id.as_str(), + status.thread_id.as_ref().map(|t| t.as_str()) ); Some(TurnJournal { run_id, @@ -372,12 +410,15 @@ mod tests { async fn journal_persists_and_replays_run() { let tmp = std::env::temp_dir().join(format!("oh-journal-test-{}", uuid::Uuid::new_v4())); let stores = open_session_stores(&tmp); - let run_id = new_run_id(); + let run_id = mint_run_id(); // Attach a journal sink directly (bypassing config resolution) and emit. + // Seed the sink with the run id so persisted `event_id`s are the + // restart-stable `{run_id}-evt-{offset}` — mirrors the caller in + // `run_turn_via_tinyagents_shared`. let journal: Arc = Arc::new(StoreEventJournal::new(stores.journal)); - let sink = EventSink::new(); + let sink = EventSink::with_stream_id(run_id.as_str()); let journal_sink = JournalSink::new(journal, run_id.clone()); let redacting = RedactingSink::new(Arc::new(journal_sink), vec!["sk-super-secret".into()]); sink.subscribe(Arc::new(FanOutSink::new().with(Arc::new(redacting)))); @@ -394,6 +435,16 @@ mod tests { // Reconstruct from the durable store alone. let replayed = read_run_events_at(&tmp, run_id.as_str(), 0).await; assert_eq!(replayed.len(), 2); + // Records come back fully ordered with restart-stable ids of the form + // `{run_id}-evt-{offset}`. + for (offset, obs) in replayed.iter().enumerate() { + assert_eq!(obs.offset, offset as u64, "offset should be monotonic"); + assert_eq!( + obs.event_id.as_str(), + format!("{}-evt-{offset}", run_id.as_str()), + "event id should be the stable {{stream_id}}-evt-{{offset}}" + ); + } // The seeded secret was masked before persistence. if let AgentEvent::ModelStarted { model, .. } = &replayed[0].event { assert!( @@ -405,15 +456,42 @@ mod tests { panic!("expected ModelStarted first"); } - // Status store round-trips a running → completed transition + list_by_root. + // Late attach at a non-zero offset: a reader that reconnects after the + // first event reconstructs only the tail (offset >= 1), still ordered and + // still with stable ids — the mid-run reconnect/backfill path. + let tail = read_run_events_at(&tmp, run_id.as_str(), 1).await; + assert_eq!(tail.len(), 1); + assert_eq!(tail[0].offset, 1); + assert_eq!( + tail[0].event_id.as_str(), + format!("{}-evt-1", run_id.as_str()) + ); + assert!(matches!(tail[0].event, AgentEvent::ToolStarted { .. })); + + // Status store round-trips a running → completed transition and answers + // list_active / list_by_root / list_by_thread. let status_store = FileStatusStore::new(open_session_stores(&tmp).kv); let mut status = - HarnessRunStatus::new(run_id.clone(), ComponentId::new("mock-model".to_string())); + HarnessRunStatus::new(run_id.clone(), ComponentId::new("mock-model".to_string())) + .with_thread(ThreadId::new("thread-42")); status.mark_running(HarnessPhase::Model); status_store.put_status(status.clone()).await.unwrap(); let active = status_store.list_active().await.unwrap(); assert_eq!(active.len(), 1); assert_eq!(active[0].status, ExecutionStatus::Running); + assert_eq!( + status_store + .list_by_thread("thread-42") + .await + .unwrap() + .len(), + 1 + ); + assert!(status_store + .list_by_thread("nope") + .await + .unwrap() + .is_empty()); status.mark_completed(); status_store.put_status(status).await.unwrap(); diff --git a/src/openhuman/tinyagents/middleware.rs b/src/openhuman/tinyagents/middleware.rs index b0a0537067..7ec1bd65e6 100644 --- a/src/openhuman/tinyagents/middleware.rs +++ b/src/openhuman/tinyagents/middleware.rs @@ -7,8 +7,6 @@ //! [`Middleware`] hooks restores the behaviour and makes the graph the single //! place cross-cutting context concerns live: //! -//! - [`CacheAlignMiddleware`] (`before_model`) — warn on volatile tokens in the -//! system prompt that would bust the provider KV-cache prefix. Warn-only. //! - [`MicrocompactMiddleware`] (`before_model`) — clear the bodies of older //! tool-result messages (keeping the N most recent) so a long tool-heavy //! thread stays cheap without dropping chat history. @@ -20,7 +18,7 @@ //! enabled onto a harness. use std::collections::HashMap; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use async_trait::async_trait; @@ -34,6 +32,7 @@ use tinyagents::harness::middleware::{ ToolAllowlistMiddleware, ToolHandler, ToolMiddleware, }; use tinyagents::harness::model::{ModelRequest, PromptSegment, SegmentRole}; +use tinyagents::harness::no_progress::{NoProgress, NoProgressTracker, ToolAttempt}; use tinyagents::harness::runtime::AgentHarness; use tinyagents::harness::steering::{SteeringCommand, SteeringHandle}; use tinyagents::harness::tool::{ @@ -74,8 +73,6 @@ pub(crate) struct TurnContextMiddleware { pub(crate) tokenjuice_compaction_enabled: bool, /// Agent-level TokenJuice profile for tool-result compaction. pub(crate) tokenjuice_compression: AgentTokenjuiceCompression, - /// Warn on volatile tokens in the system prompt (KV-cache diagnostic). - pub(crate) cache_align: bool, /// Keep-recent count for microcompact tool-body clearing. `0` disables it. pub(crate) microcompact_keep_recent: usize, /// Whether the LLM summarization step (`ContextCompressionMiddleware`) may be @@ -224,8 +221,8 @@ pub(crate) struct SuperContextConfig { impl TurnContextMiddleware { /// A sensible default for turn paths without a session `ContextManager` - /// (channel / sub-agent): cache-align warnings on and the default tool-result - /// byte cap, no summarizer or microcompact. + /// (channel / sub-agent): the default tool-result byte cap, no summarizer or + /// microcompact. pub(crate) fn defaults() -> Self { Self { tool_result_budget_bytes: DEFAULT_TOOL_RESULT_BUDGET_BYTES, @@ -233,7 +230,6 @@ impl TurnContextMiddleware { artifact_store: None, tokenjuice_compaction_enabled: false, tokenjuice_compression: AgentTokenjuiceCompression::Off, - cache_align: true, microcompact_keep_recent: 0, autocompact_enabled: true, super_context: None, @@ -246,7 +242,6 @@ impl TurnContextMiddleware { self.tool_result_budget_bytes == 0 && self.payload_summarizer.is_none() && !self.tokenjuice_compaction_enabled - && !self.cache_align && self.microcompact_keep_recent == 0 && self.super_context.is_none() && self.handoff.is_none() @@ -254,10 +249,10 @@ impl TurnContextMiddleware { /// Push the enabled middlewares onto `harness`. /// - /// `before_model` hooks run in registration order, so cache-align (warn) and - /// microcompact (clear tool bodies) are installed **before** the caller's - /// summarization / trim middlewares — microcompact frees cheap tokens first, - /// then summarization/trim handle the rest. + /// `before_model` hooks run in registration order, so microcompact (clear + /// tool bodies) is installed **before** the caller's summarization / trim + /// middlewares — microcompact frees cheap tokens first, then + /// summarization/trim handle the rest. pub(crate) fn install( self, harness: &mut AgentHarness<()>, @@ -272,9 +267,6 @@ impl TurnContextMiddleware { ran: AtomicBool::new(false), })); } - if self.cache_align { - harness.push_middleware(Arc::new(CacheAlignMiddleware)); - } if self.microcompact_keep_recent > 0 { harness.push_middleware(Arc::new(MicrocompactMiddleware { keep_recent: self.microcompact_keep_recent, @@ -551,163 +543,6 @@ fn parse_context_bundle_has_enough_context(bundle: &str) -> Option { } } -/// `before_model`: flag volatile tokens (UUIDs, timestamps, JWTs, …) in the -/// system prompt that silently break the provider KV-cache prefix. Warn-only — -/// never mutates the request. Replaces the deleted context cache-align reducer. -struct CacheAlignMiddleware; - -/// One detected volatile token in the cache-hot system prompt. -#[derive(Debug, Clone, PartialEq, Eq)] -struct VolatileFinding { - kind: &'static str, - sample: String, -} - -fn detect_volatile_prompt_tokens(system_prompt: &str) -> Vec { - let mut findings = Vec::new(); - for tok in system_prompt - .split(|c: char| !(c.is_ascii_alphanumeric() || matches!(c, '-' | '.' | ':' | '_'))) - { - if tok.len() < 8 { - continue; - } - if is_uuid(tok) { - findings.push(VolatileFinding { - kind: "uuid", - sample: redact_volatile_token(tok), - }); - } else if is_jwt(tok) { - findings.push(VolatileFinding { - kind: "jwt", - sample: redact_volatile_token(tok), - }); - } else if is_iso8601(tok) { - findings.push(VolatileFinding { - kind: "iso8601", - sample: redact_volatile_token(tok), - }); - } else if is_hex_hash(tok) { - findings.push(VolatileFinding { - kind: "hex_hash", - sample: redact_volatile_token(tok), - }); - } - } - findings -} - -fn warn_if_cache_prompt_volatile(system_prompt: &str) -> usize { - let findings = detect_volatile_prompt_tokens(system_prompt); - if !findings.is_empty() { - let mut kinds: Vec<&str> = findings.iter().map(|finding| finding.kind).collect(); - kinds.sort_unstable(); - kinds.dedup(); - let samples = findings - .iter() - .take(5) - .map(|finding| finding.sample.as_str()) - .collect::>() - .join(", "); - ::log::warn!( - "[tinyagents::cache-align] system prompt contains {} volatile token(s) ({}) samples={} -- KV-cache prefix may not hit; keep dynamic content out of the system prompt", - findings.len(), - kinds.join(", "), - samples, - ); - } - findings.len() -} - -fn redact_volatile_token(tok: &str) -> String { - let head: String = tok.chars().take(4).collect(); - format!("{head}...") -} - -fn is_uuid(tok: &str) -> bool { - if tok.len() != 36 { - return false; - } - let bytes = tok.as_bytes(); - for (i, b) in bytes.iter().enumerate() { - let expect_dash = matches!(i, 8 | 13 | 18 | 23); - if expect_dash { - if *b != b'-' { - return false; - } - } else if !b.is_ascii_hexdigit() { - return false; - } - } - true -} - -fn is_jwt(tok: &str) -> bool { - let segs: Vec<&str> = tok.split('.').collect(); - if segs.len() != 3 { - return false; - } - segs.iter().all(|segment| { - segment.len() >= 4 - && segment - .bytes() - .all(|b| b.is_ascii_alphanumeric() || b == b'-' || b == b'_') - }) && tok.starts_with("ey") -} - -fn is_hex_hash(tok: &str) -> bool { - matches!(tok.len(), 32 | 40 | 64) && tok.bytes().all(|b| b.is_ascii_hexdigit()) -} - -fn is_iso8601(tok: &str) -> bool { - let b = tok.as_bytes(); - if tok.len() < 19 { - return false; - } - let digit = |i: usize| b[i].is_ascii_digit(); - digit(0) - && digit(1) - && digit(2) - && digit(3) - && b[4] == b'-' - && digit(5) - && digit(6) - && b[7] == b'-' - && digit(8) - && digit(9) - && (b[10] == b'T' || b[10] == b' ') - && digit(11) - && digit(12) - && b[13] == b':' - && digit(14) - && digit(15) - && b[16] == b':' - && digit(17) - && digit(18) -} - -#[async_trait] -impl Middleware<()> for CacheAlignMiddleware { - fn name(&self) -> &str { - "cache_align" - } - - async fn before_model( - &self, - _ctx: &mut RunContext<()>, - _state: &(), - request: &mut ModelRequest, - ) -> TaResult<()> { - if let Some(sys) = request - .messages - .iter() - .find(|m| matches!(m, TaMessage::System(_))) - { - warn_if_cache_prompt_volatile(&sys.text()); - } - Ok(()) - } -} - /// Seed-free FNV-1a fingerprint (matches the crate's own prompt-layout hash /// approach) so a segment id is stable across process restarts — unlike Rust's /// randomly-seeded `SipHash`. Used to build content-fingerprinted prompt-cache @@ -734,11 +569,12 @@ fn stable_prefix_fingerprint(data: &str) -> String { /// have no prefix to protect. This stamps the segments with **content-fingerprint /// ids**: an unchanged system prompt + tool set yields a stable prefix, while an /// injected timestamp/uuid/etc. changes the fingerprint and the guard records a -/// [`CacheLayoutEvent`](tinyagents::harness::cache::CacheLayoutEvent). The -/// structured successor to [`CacheAlignMiddleware`]'s warn-only volatile-token -/// scan (kept installed in parallel until parity is shown; deletion is a gated -/// follow-up). Read-only w.r.t. the transcript — only sets `cache_segments` / -/// `prompt_fingerprint`. +/// [`CacheLayoutEvent`](tinyagents::harness::cache::CacheLayoutEvent). This is +/// the structured, crate-native replacement for the deleted warn-only +/// `CacheAlignMiddleware` volatile-token scan (C3): the crate +/// `PromptCacheGuardMiddleware` now owns KV-cache-prefix drift detection via +/// recorded `CacheLayoutEvent`s. Read-only w.r.t. the transcript — only sets +/// `cache_segments` / `prompt_fingerprint`. pub(crate) struct PromptCacheSegmentMiddleware; #[async_trait] @@ -1670,39 +1506,41 @@ impl Middleware<()> for CostBudgetMiddleware { } } -/// Consecutive **any**-failure no-progress backstop: different commands all -/// failing means the goal is unreachable here. Matches the legacy -/// `NO_PROGRESS_FAILURE_THRESHOLD`. -const NO_PROGRESS_FAILURE_THRESHOLD: usize = 6; -/// Consecutive **identical** hard-policy-rejection repeats before halting — a -/// blocked call re-issued unchanged can never succeed. Legacy -/// `HARD_REJECT_REPEAT_THRESHOLD`. -const HARD_REJECT_REPEAT_THRESHOLD: usize = 2; - -/// `after_tool`: stop the run when tool calls keep failing with no progress -/// (issue #4249). The legacy tool loop's progress guard surfaced a root-cause -/// halt summary — a security/approval denial re-issued unchanged, an identical -/// error retried, or *different* commands all failing — instead of burning the -/// whole iteration budget and ending on a generic cap error. The tinyagents path -/// kept only the model/tool call caps, so this reinstates the guard as a graph -/// middleware. Three halt conditions, checked per failure (any success resets -/// every counter — progress was made): +/// `after_tool`: stop (or nudge) the run when tool calls keep failing with no +/// progress (issue #4249). The legacy tool loop's progress guard surfaced a +/// root-cause halt summary — a security/approval denial re-issued unchanged, an +/// identical error retried, or *different* commands all failing — instead of +/// burning the whole iteration budget and ending on a generic cap error. The +/// tinyagents path kept only the model/tool call caps, so this reinstates the +/// guard as a graph middleware. /// -/// 1. **Hard policy rejection** (`[policy-blocked]`) repeated `HARD_REJECT_REPEAT_THRESHOLD` -/// times with an identical signature — "blocked by the security policy … re-issued". -/// 2. **Identical** error signature repeated `identical_threshold` times — -/// "retried N times with identical arguments". -/// 3. **Any** failure `NO_PROGRESS_FAILURE_THRESHOLD` times in a row (even with -/// varied errors) — "N tool calls in a row failed". +/// As of tinyagents 1.5.0 the escalation ladder itself lives in the crate +/// ([`NoProgressTracker`], extracted upstream from OpenHuman #4389). This +/// middleware is now a **thin driver**: it captures the per-call argument +/// fingerprint (the tool result carries no arguments), feeds each outcome into +/// [`NoProgressTracker::record`], and lowers the returned [`NoProgress`] verdict +/// into OpenHuman steering. It owns only the OpenHuman-side policy: /// -/// On trip it records a root-cause summary into the shared [`HaltSummarySlot`] -/// (the turn overrides its final text with it) and pauses the run via the shared -/// steering handle (same mechanism as the stop-hook / cap pausers). +/// - [`NoProgress::Continue`] — do nothing. +/// - [`NoProgress::Nudge`] — inject the crate's structured "no progress since +/// step X" corrective into the working transcript via +/// [`SteeringCommand::Redirect`] so the next model call sees it and changes +/// strategy *before* the same-strategy retry cap trips. +/// - [`NoProgress::Halt`] — record the crate's root-cause summary into the shared +/// [`HaltSummarySlot`](super::HaltSummarySlot) (the turn overrides its final +/// text with it) and pause the run via the shared steering handle (same +/// mechanism as the stop-hook / cap pausers), then [`reset`](NoProgressTracker::reset) +/// so a resumed run does not immediately re-pause on the latched state. pub(crate) struct RepeatedToolFailureMiddleware { handle: SteeringHandle, - identical_threshold: usize, halt_summary: super::HaltSummarySlot, - state: std::sync::Mutex, + /// Crate no-progress escalation ladder — the single source of the + /// identical-failure / varied-failure / hard-reject logic (tinyagents 1.5.0). + tracker: NoProgressTracker, + /// Monotonic tool-outcome counter, used only for the crate's "no progress + /// since step X" nudge wording. Not the model-call count, but a stable, + /// increasing marker is all the wording needs. + step: AtomicUsize, /// call_id → argument fingerprint, captured in `before_tool` (the tool result /// carries no arguments). Folded into the identical-repeat signature so the /// "identical arguments" halt only trips on the *same* args — two different @@ -1711,16 +1549,10 @@ pub(crate) struct RepeatedToolFailureMiddleware { arg_sigs: std::sync::Mutex>, } -#[derive(Default)] -struct FailureState { - last_sig: Option, - same_count: usize, - consecutive: usize, -} - impl RepeatedToolFailureMiddleware { /// Build the breaker. `identical_threshold` (the identical-signature retry - /// ceiling) is clamped to at least 2 — a single failure is never a loop. + /// ceiling) is handed straight to [`NoProgressTracker::new`], which clamps it + /// so a nudge always precedes a halt (a single failure is never a loop). pub(crate) fn new( handle: SteeringHandle, identical_threshold: usize, @@ -1728,9 +1560,9 @@ impl RepeatedToolFailureMiddleware { ) -> Self { Self { handle, - identical_threshold: identical_threshold.max(2), halt_summary, - state: std::sync::Mutex::new(FailureState::default()), + tracker: NoProgressTracker::new(identical_threshold), + step: AtomicUsize::new(0), arg_sigs: std::sync::Mutex::new(std::collections::HashMap::new()), } } @@ -1745,13 +1577,6 @@ fn args_fingerprint(arguments: &serde_json::Value) -> String { format!("{:x}", hasher.finish()) } -/// Trim a tool error for inclusion in a halt summary (keep it bounded but retain -/// the deterministic leading detail the model/user needs). -fn truncate_for_halt(text: &str) -> String { - const MAX: usize = 600; - crate::openhuman::util::truncate_with_ellipsis(text, MAX) -} - #[async_trait] impl Middleware<()> for RepeatedToolFailureMiddleware { fn name(&self) -> &str { @@ -1778,90 +1603,65 @@ impl Middleware<()> for RepeatedToolFailureMiddleware { _state: &(), result: &mut TaToolResult, ) -> TaResult<()> { - let mut state = self.state.lock().unwrap(); let arg_fp = self .arg_sigs .lock() .ok() .and_then(|mut sigs| sigs.remove(&result.call_id)) .unwrap_or_default(); - let Some(err) = result.error.as_deref() else { - // Success → progress was made; reset every counter. - *state = FailureState::default(); - return Ok(()); - }; - - // Signature: tool name + argument fingerprint + first error line (the - // deterministic parts; a huge payload tail must not dominate the - // identical-repeat comparison). Including the args means the "identical - // arguments" halt only fires when the args truly repeat. - let err_line = err.lines().next().unwrap_or(err); - let sig = format!("{}\u{1f}{arg_fp}\u{1f}{err_line}", result.name); - state.consecutive += 1; - let same_count = match &state.last_sig { - Some(prev) if *prev == sig => { - state.same_count += 1; - state.same_count - } - _ => { - state.last_sig = Some(sig); - state.same_count = 1; - 1 - } - }; + let step = self.step.fetch_add(1, Ordering::SeqCst) + 1; // A hard policy rejection is marked in the tool output; it can never - // succeed when re-issued unchanged, so it trips faster. - let is_hard_reject = result + // succeed when re-issued unchanged, so the crate ladder trips it faster. + let hard_reject = result .content .contains(crate::openhuman::security::POLICY_BLOCKED_MARKER) - || err.contains(crate::openhuman::security::POLICY_BLOCKED_MARKER); - - let summary = if is_hard_reject && same_count >= HARD_REJECT_REPEAT_THRESHOLD { - Some(format!( - "Stopping: the `{}` call is blocked by the security policy and was re-issued with \ - identical arguments — it can never succeed this way. Reason:\n{}\n\nDo not repeat \ - this call; use an allowed alternative or report that it can't be done here.", - result.name, - truncate_for_halt(err), - )) - } else if same_count >= self.identical_threshold { - Some(format!( - "Stopping: the `{}` call was retried {same_count} times with identical arguments \ - and kept failing — repeating it will not help. Last error:\n{}\n\nThis looks \ - unrecoverable in the current environment. Report this back instead of retrying.", - result.name, - truncate_for_halt(err), - )) - } else if state.consecutive >= NO_PROGRESS_FAILURE_THRESHOLD { - Some(format!( - "Stopping: {} tool calls in a row failed with no progress. Last error (from \ - `{}`):\n{}\n\nDifferent commands are all failing — the goal looks unreachable in \ - this environment. Report this back instead of retrying.", - state.consecutive, - result.name, - truncate_for_halt(err), - )) - } else { - None + || result + .error + .as_deref() + .is_some_and(|err| err.contains(crate::openhuman::security::POLICY_BLOCKED_MARKER)); + + let attempt = ToolAttempt { + tool: &result.name, + arg_fingerprint: &arg_fp, + error: result.error.as_deref(), + hard_reject, + // The unknown-tool recovery sentinel is a C3 concern; today every + // failure feeds the generic backstop exactly as the legacy ladder did. + recoverable_miss: false, }; - if let Some(summary) = summary { - tracing::warn!( - tool = %result.name, - consecutive = state.consecutive, - same_count, - is_hard_reject, - "[tinyagents::mw] repeated tool failure — halting run so the root cause surfaces" - ); - if let Ok(mut slot) = self.halt_summary.lock() { - *slot = Some(summary); + match self.tracker.record(step, &attempt) { + NoProgress::Continue => {} + NoProgress::Nudge(instruction) => { + tracing::warn!( + tool = %result.name, + step, + hard_reject, + "[tinyagents::mw] no-progress nudge — steering the model to change strategy before the retry cap" + ); + // Inject the crate's structured corrective into the working + // transcript (advisory system text; bypasses no security gate). + self.handle.send(SteeringCommand::Redirect { instruction }); + } + NoProgress::Halt(summary) => { + tracing::warn!( + tool = %result.name, + step, + hard_reject, + "[tinyagents::mw] repeated tool failure — halting run so the root cause surfaces" + ); + if let Ok(mut slot) = self.halt_summary.lock() { + *slot = Some(summary); + } + // Pause at the top of the next iteration (before the next model + // call), matching the stop-hook / cap pause path. Reset so a + // resumed run does not immediately re-pause on the latched state + // (the crate also resets internally on a halt; this is explicit + // and idempotent). + self.handle.send(SteeringCommand::Pause); + self.tracker.reset(); } - // Pause at the top of the next iteration (before the next model call), - // matching the stop-hook / cap pause path. Reset so a resumed run does - // not immediately re-pause on the same latched state. - self.handle.send(SteeringCommand::Pause); - *state = FailureState::default(); } Ok(()) } @@ -1926,9 +1726,8 @@ mod tests { // ── TurnContextMiddleware config ──────────────────────────────────────── #[test] - fn defaults_enable_cache_align_and_the_byte_cap_only() { + fn defaults_enable_the_byte_cap_only() { let mw = TurnContextMiddleware::defaults(); - assert!(mw.cache_align); assert_eq!( mw.tool_result_budget_bytes, DEFAULT_TOOL_RESULT_BUDGET_BYTES @@ -1938,6 +1737,8 @@ mod tests { // Autocompaction defaults on (channel/sub-agent); the chat path overrides // it from config. assert!(mw.autocompact_enabled); + // The byte cap alone is enough to make the bundle non-empty (CacheAlign + // was deleted in C3, so it no longer contributes here). assert!(!mw.is_empty()); } @@ -2201,6 +2002,18 @@ mod tests { r } + /// Count how many of the steering commands drained from `handle` are + /// `Pause` (the halt signal). The tracker-driven breaker now also emits a + /// `Redirect` **nudge** below the retry cap, so a raw `pending()` count no + /// longer isolates the halt — the tests classify by command kind instead. + fn drain_pause_count(handle: &SteeringHandle) -> usize { + handle + .drain() + .into_iter() + .filter(|c| matches!(c, SteeringCommand::Pause)) + .count() + } + #[tokio::test] async fn repeated_tool_failure_pauses_only_after_the_threshold() { let handle = SteeringHandle::allow_all(); @@ -2209,18 +2022,24 @@ mod tests { 3, std::sync::Arc::new(std::sync::Mutex::new(None)), ); - // Two identical failures: below the threshold, no pause. + // Two identical failures: below the halt threshold. The crate ladder + // nudges (Redirect) on the second, but must NOT pause (halt) yet. for _ in 0..2 { let mut r = failing_result("flaky", "boom"); mw.after_tool(&mut ctx(), &(), &mut r).await.unwrap(); } - assert_eq!(handle.pending(), 0, "no pause before the threshold"); - // Third identical failure trips the breaker. + assert_eq!( + drain_pause_count(&handle), + 0, + "no halt before the threshold" + ); + // Third identical failure exhausts the same-strategy retries → halt. let mut r = failing_result("flaky", "boom"); mw.after_tool(&mut ctx(), &(), &mut r).await.unwrap(); - assert!( - handle.pending() >= 1, - "the third identical failure should pause the run" + assert_eq!( + drain_pause_count(&handle), + 1, + "the third identical failure should pause (halt) the run" ); } @@ -2239,12 +2058,17 @@ mod tests { } let mut ok = tool_result("t", "fine"); // error = None mw.after_tool(&mut ctx(), &(), &mut ok).await.unwrap(); - // Two more failures — still below the threshold because the counter reset. + // Two more failures — still below the halt threshold because the counter + // reset, so the ladder never reaches the third identical repeat. for _ in 0..2 { let mut r = failing_result("t", "boom"); mw.after_tool(&mut ctx(), &(), &mut r).await.unwrap(); } - assert_eq!(handle.pending(), 0, "a success should reset the breaker"); + assert_eq!( + drain_pause_count(&handle), + 0, + "a success should reset the breaker so it never halts" + ); } #[tokio::test] @@ -2256,7 +2080,8 @@ mod tests { std::sync::Arc::new(std::sync::Mutex::new(None)), ); // Three *different* errors never trip the breaker — only an identical, - // deterministic failure loop does. + // deterministic failure loop does (and the varied-failure backstop nudges + // at 4 / halts at 6, both above this count). for err in ["e1", "e2", "e3"] { let mut r = failing_result("t", err); mw.after_tool(&mut ctx(), &(), &mut r).await.unwrap(); @@ -2264,7 +2089,7 @@ mod tests { assert_eq!( handle.pending(), 0, - "distinct errors must not trip the breaker" + "distinct errors below the backstop must not steer the run" ); } diff --git a/src/openhuman/tinyagents/mod.rs b/src/openhuman/tinyagents/mod.rs index 474bf8d697..4d79cdd925 100644 --- a/src/openhuman/tinyagents/mod.rs +++ b/src/openhuman/tinyagents/mod.rs @@ -156,6 +156,20 @@ fn run_policy_for(max_iterations: usize, response_cache_enabled: bool) -> RunPol policy.limits.max_tool_calls = max_iterations.saturating_mul(8).max(8); policy.limits.max_depth = MAX_SPAWN_DEPTH; policy.retry.max_attempts = 1; + // Unknown-tool recovery (01.2 / C3): the crate policy owns this end to end — + // the `__openhuman_unknown_tool__` sentinel tool + `UnknownToolRewriteMiddleware` + // were already deleted. We deliberately keep `ReturnToolError` rather than + // `Rewrite { tool_name }`: Rewrite requires a real catch-all target tool (the + // deleted sentinel was exactly that) and, when it hits, *silently* executes + // that tool and emits `AgentEvent::UnknownToolCall { recovery: "rewrite:.." }` + // WITHOUT injecting a tool message. `ReturnToolError` instead injects a + // recoverable `unknown tool `` (arguments: ..); valid tools: [..]` + // result naming the originally-requested tool. Two live consumers depend on + // that message: (1) the #4419 attempted-tool-name UX and (2) the failure + // classifier in `agent::hooks::sanitize_tool_output`, which labels the result + // `unknown_tool` by matching the "unknown tool" substring. Flipping to Rewrite + // would drop both. The original name + args are also preserved verbatim on + // `AgentEvent::UnknownToolCall` and projected by `OpenhumanEventBridge`. policy.unknown_tool = UnknownToolPolicy::ReturnToolError; // Prompt-prefix protection is always on (issue #4249, 03.2): the // `PromptCacheGuardMiddleware` records a `CacheLayoutEvent` whenever volatile @@ -512,9 +526,32 @@ pub(crate) async fn run_turn_via_tinyagents_shared( ); ctx = ctx.with_workspace(descriptor); } + // Assemble the run's store registry: the tool-result artifact index (when + // present) and — behind the default-ON session dual-write flag — the + // session KV store, so the harness carries a handle to the same + // `{workspace}/tinyagents_store/kv` tree the live dual-write mirrors into + // (issue #4249, 04.1). Both stores share one registry so neither clobbers + // the other. Reads stay legacy until 04.2; this registration is additive + // and best-effort (a workspace-resolve failure just skips it). + let mut stores: Option = None; if let Some(index) = tool_result_artifact_index { - let mut stores = StoreRegistry::new(); - stores.register(TINYAGENTS_TOOL_RESULT_ARTIFACT_STORE, index); + stores + .get_or_insert_with(StoreRegistry::new) + .register(TINYAGENTS_TOOL_RESULT_ARTIFACT_STORE, index); + } + // `session_kv_store` self-gates on the dual-write flag (config default ON + + // env kill switch), returning `None` when disabled or unresolvable. + if let Some(session_kv) = crate::openhuman::session_import::live::session_kv_store().await { + stores.get_or_insert_with(StoreRegistry::new).register( + crate::openhuman::session_import::live::TINYAGENTS_SESSION_KV_STORE, + session_kv, + ); + tracing::debug!( + "[session-store] registered session kv store on RunContext.stores under '{}'", + crate::openhuman::session_import::live::TINYAGENTS_SESSION_KV_STORE + ); + } + if let Some(stores) = stores { ctx = ctx.with_stores(stores); } @@ -533,7 +570,13 @@ pub(crate) async fn run_turn_via_tinyagents_shared( // (`on_progress = None`) turn so the run stays reconstructable, so the // EventSink is now created unconditionally — cheap (an empty sink) and, if // no consumer subscribes, inert. - let events = Some(EventSink::new()); + // + // Mint the durable run id *before* the sink and seed the sink stream prefix + // with it (`with_stream_id`), so every persisted observation's `event_id` is + // the restart-stable `{run_id}-evt-{offset}` a late-attach replay + // reconstructs the timeline from (05.1). The same id keys the journal + status. + let journal_run_id = journal::mint_run_id(); + let events = Some(EventSink::with_stream_id(journal_run_id.as_str())); let bridge = match (&events, on_progress) { (Some(events), Some(tx)) => { @@ -565,8 +608,17 @@ pub(crate) async fn run_turn_via_tinyagents_shared( // existing progress/global-bus path is untouched. Best-effort and non-fatal // — a failure to open/attach the journal returns `None` and the turn runs // unaffected. The handle stamps the terminal status once the run returns. + // A sub-agent turn records under its task scope as the status thread id, so + // `list_by_thread` can enumerate a task's runs (full parent/root lineage is + // a 05.2/05.3 follow-up). + let journal_thread_id = subagent_scope + .as_ref() + .map(|scope| tinyagents::harness::ids::ThreadId::new(scope.task_id.clone())); let turn_journal = match &events { - Some(events) => journal::attach_turn_journal(events, model).await, + Some(events) => { + journal::attach_turn_journal(events, model, journal_run_id.clone(), journal_thread_id) + .await + } None => None, }; @@ -712,9 +764,9 @@ pub(crate) async fn run_turn_via_tinyagents_shared( // `PromptCacheGuardMiddleware`'s recorded `CacheLayoutEvent`s and surface each // as a structured `[cache]` warning. Fires only when the cacheable prompt // prefix (system prompt + tool set) changed across model calls — i.e. volatile - // content silently busting the provider KV-cache prefix. The structured - // successor to `CacheAlignMiddleware`'s free-text warn-log (still installed in - // parallel until parity is shown). + // content silently busting the provider KV-cache prefix. This is now the sole + // owner of KV-cache-prefix drift detection: the warn-only + // `CacheAlignMiddleware` was deleted in C3. let cache_layout_events = prompt_cache_guard.layout_events(); if !cache_layout_events.is_empty() { tracing::debug!( @@ -896,8 +948,8 @@ struct AssembledTurnHarness { /// Crate prompt-cache guard (issue #4249, 03.2). Records a `CacheLayoutEvent` /// whenever the cacheable prompt prefix (system prompt + tool set) changes /// across model calls. Drained after the run and surfaced via - /// [`observability::surface_cache_layout_events`] — the structured successor to - /// the `CacheAlignMiddleware` warn-log. + /// [`observability::surface_cache_layout_events`] — the crate-native + /// replacement for the deleted `CacheAlignMiddleware` warn-log (C3). prompt_cache_guard: Arc, } @@ -1270,18 +1322,19 @@ fn assemble_turn_harness( // precede the guard; both run before the context middlewares below (they only // touch the volatile tail / tool bodies, never the stable prefix). The guard is // returned so the run loop can drain its events into the observability bridge — - // the structured successor to `CacheAlignMiddleware`'s warn-log (kept installed - // via `context_mw` until parity is shown). + // the crate-native replacement for the deleted `CacheAlignMiddleware` warn-log + // (C3: the warn-only shadow is gone; this guard is the sole owner). harness.push_middleware(Arc::new(middleware::PromptCacheSegmentMiddleware)); let prompt_cache_guard = Arc::new(PromptCacheGuardMiddleware::new()); harness.push_middleware(prompt_cache_guard.clone()); - // openhuman context concerns as graph middlewares (issue #4249): cache-align - // warnings, microcompact tool-body clearing, and the after-tool byte cap / - // payload summarizer. Installed before the summarization/trim block below so - // `before_model` hooks run cache-align → microcompact → compress → trim. - // Tool-result caps read the SDK registry policy snapshot, not the - // OpenHuman-side tool lookup. + // openhuman context concerns as graph middlewares (issue #4249): microcompact + // tool-body clearing and the after-tool byte cap / payload summarizer. + // Installed before the summarization/trim block below so `before_model` hooks + // run microcompact → compress → trim. (KV-cache-prefix drift is handled above + // by the crate `PromptCacheGuardMiddleware`; the warn-only CacheAlign shadow + // was deleted in C3.) Tool-result caps read the SDK registry policy snapshot, + // not the OpenHuman-side tool lookup. let tool_policies = harness.tools().policies(); context_mw.install(&mut harness, tool_policies); diff --git a/src/openhuman/tinyagents/observability.rs b/src/openhuman/tinyagents/observability.rs index 20651274d1..cb55908316 100644 --- a/src/openhuman/tinyagents/observability.rs +++ b/src/openhuman/tinyagents/observability.rs @@ -614,12 +614,12 @@ impl EventListener for OpenhumanEventBridge { /// /// The guard records a layout event whenever the cacheable prompt prefix changes /// between turns (volatile content — a timestamp, uuid, injected memory, etc. — -/// silently busting the provider KV-cache prefix). This is the structured -/// successor to `CacheAlignMiddleware`'s free-text warn-log: instead of a -/// token-pattern heuristic it reports the exact before/after cacheable segment -/// ids. Drained by the turn loop after the run and logged here; `CacheAlign` is -/// kept installed in parallel until parity is shown (its deletion is a gated -/// follow-up). +/// silently busting the provider KV-cache prefix). This is the crate-native +/// replacement for the deleted `CacheAlignMiddleware` free-text warn-log: +/// instead of a token-pattern heuristic it reports the exact before/after +/// cacheable segment ids. Drained by the turn loop after the run and logged +/// here. The warn-only `CacheAlignMiddleware` shadow was deleted in C3; this +/// guard is now the sole owner of KV-cache-prefix drift detection. pub(crate) fn surface_cache_layout_events(model: &str, events: &[CacheLayoutEvent]) { for event in events { tracing::warn!( diff --git a/src/openhuman/todos/graph_shadow.rs b/src/openhuman/todos/graph_shadow.rs new file mode 100644 index 0000000000..f74be76068 --- /dev/null +++ b/src/openhuman/todos/graph_shadow.rs @@ -0,0 +1,425 @@ +//! Shadow adapter: mirror the OpenHuman task board into the vendored +//! `tinyagents::graph::todos` crate `TaskBoard` (crate `Store` namespace +//! `graph.todos`). +//! +//! ADAPTER-FIRST / SHADOW ONLY — nothing in this module changes product +//! behavior. The legacy [`TaskBoardStore`](crate::openhuman::agent::task_board) +//! + [`todos::ops`](crate::openhuman::todos::ops) remain the single source of +//! truth. This module (C2b first slice) mirrors post-mutation card snapshots +//! into a crate `Store` and shadow-runs the crate `claim_card` CAS purely to +//! prove parity ahead of the C2 cutover, logging any divergence. All work is +//! best-effort and fire-and-forget: a mirror/claim failure is logged and +//! swallowed, never surfaced to a caller. +//! +//! # Status mapping +//! The OpenHuman and crate `TaskCardStatus` enums are 1:1 +//! (`Todo`/`AwaitingApproval`/`Ready`/`InProgress`/`Blocked`/`Done`/`Rejected`), +//! so [`map_status_to_crate`] is total and lossless. +//! +//! # Known OpenHuman ↔ crate semantic divergences (logged, not reconciled) +//! - **Scratch boards.** OpenHuman has an in-memory, thread-less +//! [`BoardLocation::Scratch`](crate::openhuman::todos::ops::BoardLocation) +//! fallback (tool calls outside a chat thread). The crate task board is +//! always `(Store, thread_id)`, so scratch mutations have no mirror target +//! and are skipped (trace-logged). +//! - **Card id minting.** OpenHuman `normalise_board` mints missing ids as +//! `task-`; the crate mints `task-`. We pass ids through unchanged +//! so an already-persisted board round-trips, but a brand-new blank id would +//! diverge — logged if observed. +//! - **Timestamps.** OpenHuman stores `updated_at` as RFC3339; the crate stores +//! unix-epoch millis. Cosmetic; the mirror does not attempt to reconcile. +//! - **Single writer.** The crate `Store` has no compare-and-set, so both the +//! mirror and the shadow-claim assume the core process is the only writer of +//! ns `graph.todos` (it is). Concurrent shadow tasks converge to the latest +//! legacy state; only the log-line ordering is nondeterministic. + +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use tinyagents::graph::todos::store as crate_todos; +use tinyagents::graph::todos::{ + TaskApprovalMode as CrateApprovalMode, TaskBoardCard as CrateCard, + TaskCardStatus as CrateStatus, +}; +use tinyagents::harness::store::{FileStore, Store}; + +use crate::openhuman::agent::task_board::{ + TaskApprovalMode as OhApprovalMode, TaskBoardCard as OhCard, TaskCardStatus as OhStatus, +}; +use crate::openhuman::todos::ops::BoardLocation; + +/// Sub-directory of the workspace holding the crate `FileStore` that backs the +/// shadow `graph.todos` namespace. Kept separate from the authoritative +/// `agent_task_boards/` JSON so the shadow never collides with product state. +const SHADOW_STORE_DIR: &str = "tinyagents_graph_store"; + +/// Maps an OpenHuman [`OhStatus`] to the crate [`CrateStatus`]. Total (the two +/// enums share the same seven variants). +pub(crate) fn map_status_to_crate(status: &OhStatus) -> CrateStatus { + match status { + OhStatus::Todo => CrateStatus::Todo, + OhStatus::AwaitingApproval => CrateStatus::AwaitingApproval, + OhStatus::Ready => CrateStatus::Ready, + OhStatus::InProgress => CrateStatus::InProgress, + OhStatus::Blocked => CrateStatus::Blocked, + OhStatus::Done => CrateStatus::Done, + OhStatus::Rejected => CrateStatus::Rejected, + } +} + +/// Maps a crate [`CrateStatus`] back to an OpenHuman [`OhStatus`]. Total; used +/// only to compare a shadow-claim result against the legacy outcome. +pub(crate) fn map_status_from_crate(status: CrateStatus) -> OhStatus { + match status { + CrateStatus::Todo => OhStatus::Todo, + CrateStatus::AwaitingApproval => OhStatus::AwaitingApproval, + CrateStatus::Ready => OhStatus::Ready, + CrateStatus::InProgress => OhStatus::InProgress, + CrateStatus::Blocked => OhStatus::Blocked, + CrateStatus::Done => OhStatus::Done, + CrateStatus::Rejected => OhStatus::Rejected, + } +} + +fn map_approval_mode(mode: &OhApprovalMode) -> CrateApprovalMode { + match mode { + OhApprovalMode::Required => CrateApprovalMode::Required, + OhApprovalMode::NotRequired => CrateApprovalMode::NotRequired, + } +} + +/// Converts an OpenHuman [`OhCard`] into the crate [`CrateCard`], preserving the +/// id, status, and all optional metadata so a persisted board round-trips. +pub(crate) fn to_crate_card(card: &OhCard) -> CrateCard { + CrateCard { + id: card.id.clone(), + title: card.title.clone(), + status: map_status_to_crate(&card.status), + objective: card.objective.clone(), + plan: card.plan.clone(), + assigned_agent: card.assigned_agent.clone(), + allowed_tools: card.allowed_tools.clone(), + approval_mode: card.approval_mode.as_ref().map(map_approval_mode), + acceptance_criteria: card.acceptance_criteria.clone(), + evidence: card.evidence.clone(), + notes: card.notes.clone(), + blocker: card.blocker.clone(), + session_thread_id: card.session_thread_id.clone(), + source_metadata: card.source_metadata.clone(), + order: card.order, + updated_at: card.updated_at.clone(), + } +} + +/// Builds the crate `Store` rooted at `/tinyagents_graph_store`. +pub(crate) fn crate_store_for(workspace_dir: &Path) -> Arc { + Arc::new(FileStore::new(workspace_dir.join(SHADOW_STORE_DIR))) +} + +/// Returns `(workspace_dir, thread_id)` for a mirrorable `Thread` board, or +/// `None` for the thread-less `Scratch` board (which has no crate target). +fn thread_target(location: &BoardLocation) -> Option<(PathBuf, String)> { + match location { + BoardLocation::Thread { + workspace_dir, + thread_id, + } => Some((workspace_dir.clone(), thread_id.clone())), + BoardLocation::Scratch => None, + } +} + +/// Fire-and-forget: mirror the post-mutation `cards` for `location` into the +/// crate `graph.todos` store. No-op for scratch boards or when no tokio runtime +/// is available (e.g. a sync unit test). Never affects the caller. +pub(crate) fn spawn_mirror(location: &BoardLocation, cards: &[OhCard]) { + let Some((workspace_dir, thread_id)) = thread_target(location) else { + tracing::trace!("[todos][graph-shadow] mirror skipped: scratch board has no crate target"); + return; + }; + let crate_cards: Vec = cards.iter().map(to_crate_card).collect(); + let in_progress = crate_cards + .iter() + .filter(|c| matches!(c.status, CrateStatus::InProgress)) + .count(); + spawn_best_effort(async move { + let store = crate_store_for(&workspace_dir); + match crate_todos::replace(&store, &thread_id, crate_cards).await { + Ok(snap) => { + tracing::debug!( + thread_id = %thread_id, + card_count = snap.cards.len(), + "[todos][graph-shadow] mirror ok" + ); + } + Err(e) => { + // The most likely divergence is the single-InProgress invariant: + // the crate rejects >1 in-progress. Product enforces the same + // rule before save, so a rejection here flags a real mismatch. + tracing::warn!( + thread_id = %thread_id, + in_progress, + error = %e, + "[todos][graph-shadow] mirror DIVERGENCE (crate replace rejected)" + ); + } + } + }); +} + +/// Fire-and-forget shadow of a `claim_card` CAS. Mirrors `pre_cards` (the board +/// as loaded, before the legacy claim mutated it) into the crate store, replays +/// the crate `claim_card`, and logs whether the crate outcome agrees with the +/// authoritative `legacy_ok`. Log-only: the legacy claim stays authoritative. +pub(crate) fn spawn_shadow_claim( + location: &BoardLocation, + pre_cards: Vec, + card_id: &str, + expected: Vec, + target: OhStatus, + legacy_ok: bool, +) { + let Some((workspace_dir, thread_id)) = thread_target(location) else { + tracing::trace!( + "[todos][graph-shadow] shadow-claim skipped: scratch board has no crate target" + ); + return; + }; + let card_id = card_id.to_string(); + let crate_cards: Vec = pre_cards.iter().map(to_crate_card).collect(); + let crate_expected: Vec = expected.iter().map(map_status_to_crate).collect(); + let crate_target = map_status_to_crate(&target); + spawn_best_effort(async move { + let store = crate_store_for(&workspace_dir); + // Seed the crate board with the pre-claim snapshot so the CAS runs + // against the same state the legacy claim saw (deterministic regardless + // of any concurrent mirror task). + if let Err(e) = crate_todos::replace(&store, &thread_id, crate_cards).await { + tracing::debug!( + thread_id = %thread_id, + card_id = %card_id, + error = %e, + "[todos][graph-shadow] shadow-claim seed replace failed; skipping compare" + ); + return; + } + let crate_result = + crate_todos::claim_card(&store, &thread_id, &card_id, &crate_expected, crate_target) + .await; + let crate_ok = crate_result.is_ok(); + if crate_ok == legacy_ok { + tracing::debug!( + thread_id = %thread_id, + card_id = %card_id, + outcome_ok = legacy_ok, + "[todos][graph-shadow] shadow-claim parity" + ); + } else { + tracing::warn!( + thread_id = %thread_id, + card_id = %card_id, + legacy_ok, + crate_ok, + crate_err = crate_result.as_ref().err().map(|e| e.to_string()), + "[todos][graph-shadow] shadow-claim DIVERGENCE (legacy vs crate CAS disagree)" + ); + } + }); +} + +/// Spawns `fut` onto the current tokio runtime if one exists; otherwise +/// trace-logs and drops it. Keeps the shadow entirely off the caller's path. +fn spawn_best_effort(fut: F) +where + F: std::future::Future + Send + 'static, +{ + match tokio::runtime::Handle::try_current() { + Ok(handle) => { + handle.spawn(fut); + } + Err(_) => { + tracing::trace!( + "[todos][graph-shadow] no tokio runtime; shadow task skipped (sync context)" + ); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn oh_card(id: &str, status: OhStatus) -> OhCard { + OhCard { + id: id.to_string(), + title: format!("card {id}"), + status, + objective: Some("obj".to_string()), + plan: vec!["step-1".to_string()], + assigned_agent: Some("planner".to_string()), + allowed_tools: vec!["todo".to_string()], + approval_mode: Some(OhApprovalMode::Required), + acceptance_criteria: vec!["tests pass".to_string()], + evidence: vec!["cargo test".to_string()], + notes: Some("note".to_string()), + blocker: None, + session_thread_id: Some("thread-x".to_string()), + source_metadata: Some(serde_json::json!({ "urgency": 0.5 })), + order: 3, + updated_at: "2026-07-03T00:00:00Z".to_string(), + } + } + + #[test] + fn status_mapping_is_total_and_round_trips() { + let all = [ + OhStatus::Todo, + OhStatus::AwaitingApproval, + OhStatus::Ready, + OhStatus::InProgress, + OhStatus::Blocked, + OhStatus::Done, + OhStatus::Rejected, + ]; + for oh in all { + let crate_status = map_status_to_crate(&oh); + // The stable string label must survive the mapping unchanged. + assert_eq!(oh.as_str(), crate_status.as_str()); + // And the mapping must round-trip losslessly. + assert_eq!(map_status_from_crate(crate_status), oh); + } + } + + #[test] + fn card_conversion_preserves_all_fields() { + let oh = oh_card("task-1", OhStatus::InProgress); + let c = to_crate_card(&oh); + assert_eq!(c.id, "task-1"); + assert_eq!(c.title, "card task-1"); + assert_eq!(c.status, CrateStatus::InProgress); + assert_eq!(c.objective.as_deref(), Some("obj")); + assert_eq!(c.plan, vec!["step-1".to_string()]); + assert_eq!(c.assigned_agent.as_deref(), Some("planner")); + assert_eq!(c.allowed_tools, vec!["todo".to_string()]); + assert_eq!(c.approval_mode, Some(CrateApprovalMode::Required)); + assert_eq!(c.acceptance_criteria, vec!["tests pass".to_string()]); + assert_eq!(c.evidence, vec!["cargo test".to_string()]); + assert_eq!(c.notes.as_deref(), Some("note")); + assert_eq!(c.session_thread_id.as_deref(), Some("thread-x")); + assert_eq!( + c.source_metadata, + Some(serde_json::json!({ "urgency": 0.5 })) + ); + assert_eq!(c.order, 3); + assert_eq!(c.updated_at, "2026-07-03T00:00:00Z"); + } + + #[test] + fn approval_mode_maps_both_variants() { + assert_eq!( + map_approval_mode(&OhApprovalMode::Required), + CrateApprovalMode::Required + ); + assert_eq!( + map_approval_mode(&OhApprovalMode::NotRequired), + CrateApprovalMode::NotRequired + ); + } + + #[test] + fn scratch_board_has_no_crate_target() { + assert!(thread_target(&BoardLocation::Scratch).is_none()); + let loc = BoardLocation::Thread { + workspace_dir: PathBuf::from("/tmp/ws"), + thread_id: "user-tasks".to_string(), + }; + let (ws, tid) = thread_target(&loc).expect("thread target"); + assert_eq!(ws, PathBuf::from("/tmp/ws")); + assert_eq!(tid, "user-tasks"); + } + + /// The crate `store::replace` mirror path applied end-to-end: mapping a + /// legacy board of OpenHuman cards into the crate store yields a crate board + /// whose statuses and ids match, proving the mirror adapter round-trips. + #[tokio::test] + async fn mirror_round_trips_through_crate_store() { + let dir = tempfile::tempdir().expect("tempdir"); + let store = crate_store_for(dir.path()); + let cards = vec![ + oh_card("task-a", OhStatus::Todo), + oh_card("task-b", OhStatus::InProgress), + oh_card("task-c", OhStatus::Blocked), + ]; + let crate_cards: Vec = cards.iter().map(to_crate_card).collect(); + let snap = crate_todos::replace(&store, "user-tasks", crate_cards) + .await + .expect("replace ok"); + assert_eq!(snap.cards.len(), 3); + assert_eq!(snap.cards[0].id, "task-a"); + assert_eq!(snap.cards[1].status, CrateStatus::InProgress); + assert_eq!(snap.cards[2].status, CrateStatus::Blocked); + + // A re-read via the crate list op returns the same board. + let listed = crate_todos::list(&store, "user-tasks") + .await + .expect("list ok"); + assert_eq!(listed.cards.len(), 3); + } + + /// The single-InProgress invariant is shared: a legacy board that already + /// violates it (two in-progress) is rejected by the crate mirror exactly as + /// the product `enforce_single_in_progress` would — the divergence the + /// mirror is built to surface. + #[tokio::test] + async fn crate_mirror_rejects_double_in_progress_like_product() { + let dir = tempfile::tempdir().expect("tempdir"); + let store = crate_store_for(dir.path()); + let cards = vec![ + to_crate_card(&oh_card("task-a", OhStatus::InProgress)), + to_crate_card(&oh_card("task-b", OhStatus::InProgress)), + ]; + let err = crate_todos::replace(&store, "user-tasks", cards) + .await + .expect_err("double in-progress must be rejected"); + assert!( + err.to_string().contains("in_progress"), + "unexpected error: {err}" + ); + } + + /// The crate `claim_card` CAS agrees with the legacy claim contract: + /// claiming a `Todo` card to `InProgress` succeeds, and a second claim + /// expecting `Todo` is rejected because the card already moved on. + #[tokio::test] + async fn crate_claim_cas_matches_legacy_contract() { + let dir = tempfile::tempdir().expect("tempdir"); + let store = crate_store_for(dir.path()); + let cards = vec![to_crate_card(&oh_card("task-a", OhStatus::Todo))]; + crate_todos::replace(&store, "user-tasks", cards) + .await + .expect("seed"); + + let expected = [CrateStatus::Todo, CrateStatus::Ready]; + let claimed = crate_todos::claim_card( + &store, + "user-tasks", + "task-a", + &expected, + CrateStatus::InProgress, + ) + .await + .expect("first claim ok"); + assert_eq!(claimed.status, CrateStatus::InProgress); + + // Second claim expecting Todo now loses the CAS — matches the legacy + // "claim rejected" path the dispatcher relies on. + let rejected = crate_todos::claim_card( + &store, + "user-tasks", + "task-a", + &expected, + CrateStatus::InProgress, + ) + .await; + assert!(rejected.is_err(), "stale claim must be rejected"); + } +} diff --git a/src/openhuman/todos/mod.rs b/src/openhuman/todos/mod.rs index 8cb3dc1787..95a8141b4a 100644 --- a/src/openhuman/todos/mod.rs +++ b/src/openhuman/todos/mod.rs @@ -13,6 +13,7 @@ //! `markdown` string so the chat UI / agent transcript can render the //! list directly without re-formatting. +pub mod graph_shadow; pub mod ops; pub mod runs; pub mod schemas; diff --git a/src/openhuman/todos/ops.rs b/src/openhuman/todos/ops.rs index 400b646bcd..aef41bdd5c 100644 --- a/src/openhuman/todos/ops.rs +++ b/src/openhuman/todos/ops.rs @@ -153,7 +153,12 @@ fn save_cards( }; normalise_board(&mut board); let store = TaskBoardStore::new(workspace_dir.clone()); - Ok(store.put(board)?.cards) + let saved = store.put(board)?.cards; + // C2b shadow (adapter-first): mirror the persisted board into the + // vendored crate `graph.todos` store. Fire-and-forget, log-only — + // never affects this authoritative write. + super::graph_shadow::spawn_mirror(location, &saved); + Ok(saved) } BoardLocation::Scratch => { let mut board = TaskBoard { @@ -541,6 +546,55 @@ pub fn claim_card( let _scratch_guard = maybe_scratch_lock(location); let mut cards = load_cards(location)?; + // Snapshot the pre-claim board so the C2b shadow can replay the crate CAS + // against the same state the legacy claim saw (see below). + let pre_cards = cards.clone(); + + // Compute the authoritative outcome without early-returning, so the shadow + // observes the same ok/err verdict (including the not-found/wrong-status + // rejection paths the dispatcher relies on). + let legacy = apply_claim(&mut cards, card_id, expected, target.clone()); + let legacy_ok = legacy.is_ok(); + + let result = match legacy { + Ok(claimed_card) => { + let saved = save_cards(location, cards)?; + emit_progress(location, &saved); + tracing::info!( + card_id = %card_id, + new_status = %claimed_card.status.as_str(), + "[todos][ops] claim_card ok" + ); + Ok(claimed_card) + } + Err(e) => Err(e), + }; + + // Shadow the CAS onto the vendored crate `graph.todos` store (adapter-first, + // log-only). The legacy claim above stays authoritative. + super::graph_shadow::spawn_shadow_claim( + location, + pre_cards, + card_id, + expected.to_vec(), + target, + legacy_ok, + ); + + result +} + +/// Applies a claim to an in-memory card set: find `card_id`, verify its status +/// is in `expected`, transition it to `target`, and enforce the single- +/// `InProgress` invariant. Returns the claimed card (cloned) on success. Does +/// **not** persist — the caller saves the mutated `cards`. Extracted so +/// [`claim_card`] can capture a single ok/err verdict for its crate shadow. +fn apply_claim( + cards: &mut [TaskBoardCard], + card_id: &str, + expected: &[TaskCardStatus], + target: TaskCardStatus, +) -> Result { let card = cards .iter_mut() .find(|c| c.id == card_id) @@ -563,15 +617,7 @@ pub fn claim_card( card.updated_at = Utc::now().to_rfc3339(); let claimed_card = card.clone(); - enforce_single_in_progress(&cards)?; - let cards = save_cards(location, cards)?; - emit_progress(location, &cards); - - tracing::info!( - card_id = %card_id, - new_status = %claimed_card.status.as_str(), - "[todos][ops] claim_card ok" - ); + enforce_single_in_progress(cards)?; Ok(claimed_card) } diff --git a/vendor/tinyagents b/vendor/tinyagents new file mode 160000 index 0000000000..a9500184b3 --- /dev/null +++ b/vendor/tinyagents @@ -0,0 +1 @@ +Subproject commit a9500184b3d6e87e43019e757d4ca622a418b9d9