From ab3586ede9517c9fd02c12dc0a43bba1a78dcce8 Mon Sep 17 00:00:00 2001
From: oxoxDev <nikhil@tinyhumans.ai>
Date: Thu, 2 Jul 2026 16:16:39 +0530
Subject: [PATCH 1/3] feat(inference): add permanent per-request TPM rate-cap
 matcher (#4404)

Recognize a direct BYO-provider 413 whose single-request token count exceeds
the account's tokens-per-minute cap (groq on_demand free tier). Anchored on
both "request too large" (single-request permanence) and a tokens-per-minute
marker, so a transient 429 burst and context-window overflow stay in their own
buckets. Single source of truth for the Sentry classifier and the subconscious
circuit breaker. Verbatim-body test guards against wording drift.
---
 .../inference/provider/ops/http_error.rs      | 63 +++++++++++++++++++
 src/openhuman/inference/provider/ops/mod.rs   | 18 +++---
 2 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/src/openhuman/inference/provider/ops/http_error.rs b/src/openhuman/inference/provider/ops/http_error.rs
index 46d49c5558..8259e27bd1 100644
--- a/src/openhuman/inference/provider/ops/http_error.rs
+++ b/src/openhuman/inference/provider/ops/http_error.rs
@@ -667,6 +667,42 @@ pub fn log_context_window_exceeded(
     );
 }
 
+/// Whether a provider error body is a **permanent per-request rate-cap
+/// rejection**: the provider refused because a *single* request's token count
+/// exceeds the account's tokens-per-minute (TPM) budget, so no amount of
+/// retrying or spacing can ever let it through on the current tier.
+///
+/// Distinct from a *transient* TPM `429` ("rate limit reached … try again in
+/// 2s" — a burst that [`is_context_window_exceeded_message`] and the `reliable`
+/// retry classifier deliberately keep retryable), from a monthly-plan quota
+/// ([`body_indicates_quota_exhausted`]), and from context-window overflow
+/// ([`is_context_window_exceeded_message`], a model-size limit not a rate cap).
+/// Here the request is larger than the per-minute limit outright, so it is
+/// permanently non-viable until the user picks a higher-tier model/provider —
+/// OpenHuman has no lever to raise a third-party account's TPM tier.
+///
+/// Canonical wire shape (groq `on_demand` free tier, Sentry TAURI-RUST-HXF):
+/// `groq API error (413 Payload Too Large): {"error":{"message":"Request too
+/// large for model `openai/gpt-oss-120b` in organization `org_…` service tier
+/// `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084 …"}}`.
+///
+/// Anchored on BOTH the permanence marker `"request too large"` (a single
+/// request over the cap, not a burst) AND a per-minute-tokens marker
+/// (`"tokens per minute"` / `"(tpm)"`), so a transient "rate limit reached,
+/// retry in Ns" burst — which lacks "request too large" — is NOT swallowed and
+/// stays retryable + Sentry-visible. Status-agnostic (groq uses `413`; a
+/// gateway could wrap it) and covered by a verbatim-body test so a provider
+/// wording drift fails CI. Single source of truth shared by
+/// [`crate::core::observability::is_provider_user_state_message`] (Sentry
+/// demotion of the `domain=agent` re-report) and the subconscious tick loop's
+/// permanent-rejection circuit breaker
+/// (`crate::openhuman::subconscious::engine`).
+pub fn is_provider_rate_cap_exceeded_message(body: &str) -> bool {
+    let lower = body.to_ascii_lowercase();
+    lower.contains("request too large")
+        && (lower.contains("tokens per minute") || lower.contains("(tpm)"))
+}
+
 /// Whether a provider non-2xx response is the OpenHuman **backend** rejecting
 /// the app session JWT (`401`/`403`). This is expected user-session state
 /// (token expired / revoked / rotated server-side), not a product bug — the
@@ -1225,6 +1261,33 @@ mod tests {
         assert!(body_indicates_quota_exhausted(C9A_BODY));
     }
 
+    #[test]
+    fn rate_cap_exceeded_matches_verbatim_hxf_body_but_not_transient_or_context() {
+        // TAURI-RUST-HXF: verbatim groq `on_demand` free-tier 413 — a single
+        // request over the per-minute token cap. Status-agnostic; anchored on
+        // BOTH "request too large" (single-request permanence) and a
+        // tokens-per-minute marker.
+        assert!(is_provider_rate_cap_exceeded_message(
+            "groq API error (413 Payload Too Large): {\"error\":{\"message\":\"Request too large \
+             for model `openai/gpt-oss-120b` in organization `org_x` service tier `on_demand` on \
+             tokens per minute (TPM): Limit 8000, Requested 42084.\",\"code\":\"rate_limit_exceeded\"}}"
+        ));
+        // Transient burst ("try again in Ns") lacks "request too large" → stays
+        // retryable + Sentry-visible.
+        assert!(!is_provider_rate_cap_exceeded_message(
+            "groq API error (429 Too Many Requests): Rate limit reached. Please try again in 2.5s."
+        ));
+        // Context-window overflow is a different bucket (model size, not a rate
+        // cap) — no tokens-per-minute marker.
+        assert!(!is_provider_rate_cap_exceeded_message(
+            "openai API error (400): This model's maximum context length is 8192 tokens"
+        ));
+        // A bare 413 with no TPM marker must not match.
+        assert!(!is_provider_rate_cap_exceeded_message(
+            "openai API error (413 Payload Too Large): request entity too large"
+        ));
+    }
+
     #[test]
     fn quota_exhausted_matches_verbatim_afe_body() {
         // Coverage gap closed (TAURI-RUST-AFE): the Responses `usage_limit_reached`
diff --git a/src/openhuman/inference/provider/ops/mod.rs b/src/openhuman/inference/provider/ops/mod.rs
index a4940ee2ad..e920d6ddba 100644
--- a/src/openhuman/inference/provider/ops/mod.rs
+++ b/src/openhuman/inference/provider/ops/mod.rs
@@ -27,15 +27,15 @@ pub use http_error::{
     is_openai_oauth_session_expired_http, is_openai_oauth_session_expired_message,
     is_provider_access_policy_denied_http_403, is_provider_config_rejection_http,
     is_provider_insufficient_credits_402, is_provider_moderation_rejection_http_400,
-    is_provider_quota_exhausted, local_provider_no_model_loaded_user_message,
-    log_backend_error_code_owned, log_budget_exhausted_http_400, log_byo_provider_auth_failure,
-    log_context_window_exceeded, log_custom_openai_upstream_bad_request_http_400,
-    log_local_provider_no_model_loaded, log_ollama_cloud_internal_500,
-    log_openai_oauth_session_expired, log_provider_access_policy_denied_http_403,
-    log_provider_config_rejection, log_provider_insufficient_credits_402,
-    log_provider_moderation_rejection, log_provider_quota_exhausted,
-    ollama_cloud_internal_500_user_message, publish_backend_session_expired,
-    should_report_provider_http_failure,
+    is_provider_quota_exhausted, is_provider_rate_cap_exceeded_message,
+    local_provider_no_model_loaded_user_message, log_backend_error_code_owned,
+    log_budget_exhausted_http_400, log_byo_provider_auth_failure, log_context_window_exceeded,
+    log_custom_openai_upstream_bad_request_http_400, log_local_provider_no_model_loaded,
+    log_ollama_cloud_internal_500, log_openai_oauth_session_expired,
+    log_provider_access_policy_denied_http_403, log_provider_config_rejection,
+    log_provider_insufficient_credits_402, log_provider_moderation_rejection,
+    log_provider_quota_exhausted, ollama_cloud_internal_500_user_message,
+    publish_backend_session_expired, should_report_provider_http_failure,
 };
 
 pub use models::{

From 4bad5d07b357ef613a8564467c1556ad05d20b81 Mon Sep 17 00:00:00 2001
From: oxoxDev <nikhil@tinyhumans.ai>
Date: Thu, 2 Jul 2026 16:16:46 +0530
Subject: [PATCH 2/3] fix(observability): demote direct-provider 413/TPM
 rate-cap (#4404)

TAURI-RUST-HXF: a direct BYO provider (groq on_demand free tier) rejecting a
single request that exceeds the account per-minute token cap is user-config
state OpenHuman cannot lift, not a product bug. Add it to is_provider_user_state_message
so the domain=agent re-report demotes instead of paging. The managed-backend
PAYLOAD_TOO_LARGE guard-leak still force-captures earlier, so this arm only
sees direct-provider TPM rejections. Regression test pins the managed path still
pages and a transient/bare 413 is not demoted.
---
 src/core/observability.rs | 71 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 71 insertions(+)

diff --git a/src/core/observability.rs b/src/core/observability.rs
index 346645d7ca..de9a617d0a 100644
--- a/src/core/observability.rs
+++ b/src/core/observability.rs
@@ -1476,6 +1476,21 @@ fn is_backend_user_error_message(lower: &str) -> bool {
 /// classifier survives caller wrapping (rpc.invoke_method, agent.run_single,
 /// `[composio:gmail]` prefixes, anyhow chains, …).
 fn is_provider_user_state_message(lower: &str) -> bool {
+    // TAURI-RUST-HXF: a direct BYO provider (groq `on_demand` free tier)
+    // rejected a *single* request whose token count exceeds the account's
+    // tokens-per-minute cap — `413 Payload Too Large … Request too large …
+    // tokens per minute (TPM): Limit 8000, Requested 42084`. It is permanently
+    // non-viable on the current tier (not a burst that retry/backoff clears)
+    // and OpenHuman cannot raise a third-party account's TPM tier, so it is
+    // user-config state, not a product bug. NOTE: a *managed-backend*
+    // `PAYLOAD_TOO_LARGE` guard-leak is force-captured (returns `None`) earlier
+    // in `expected_error_kind`, before this matcher runs, so this arm only ever
+    // sees direct-provider TPM rejections. Shared matcher (single source of
+    // truth with the subconscious circuit breaker) so the wording can't drift.
+    if crate::openhuman::inference::provider::is_provider_rate_cap_exceeded_message(lower) {
+        return true;
+    }
+
     // OPENHUMAN-TAURI-3R / -3S: composio enable_trigger when the slug isn't
     // in the trigger registry (e.g. user clicked a stale UI option).
     // Backend returns 500 with `"Trigger type GITHUB_PUSH_EVENT not found"`.
@@ -3760,6 +3775,62 @@ mod tests {
         }
     }
 
+    // ── ProviderUserState: permanent TPM rate cap (TAURI-RUST-HXF) ─────────
+
+    #[test]
+    fn classifies_provider_rate_cap_413_tpm_rereport_as_provider_user_state() {
+        // TAURI-RUST-HXF: verbatim groq `on_demand` free-tier body — a single
+        // subconscious request (42084 tokens) exceeds the 8000 tokens-per-minute
+        // cap, so groq returns 413 and no retry can ever fit it. When re-raised
+        // by `agent.run_single` under `domain=agent`, `report_error_or_expected`
+        // must demote it to expected user-config state (the user's account tier
+        // is not a lever OpenHuman controls) instead of paging Sentry.
+        assert_eq!(
+            expected_error_kind(
+                "groq API error (413 Payload Too Large): {\"error\":{\"message\":\"Request too large \
+                 for model `openai/gpt-oss-120b` in organization `org_01k48ewn75ez7tsgw5hmd72px2` \
+                 service tier `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084. \
+                 Please try again later.\",\"type\":\"tokens\",\"code\":\"rate_limit_exceeded\"}}"
+            ),
+            Some(ExpectedErrorKind::ProviderUserState)
+        );
+    }
+
+    #[test]
+    fn managed_backend_payload_too_large_still_pages_despite_rate_cap_arm() {
+        // Regression pin: a *managed-backend* `PAYLOAD_TOO_LARGE` is a
+        // client-guard leak (the client was supposed to bound the request) and
+        // MUST keep paging. The guard-leak arm returns `None` before the
+        // ProviderUserState matcher runs, so the new TPM arm cannot demote it.
+        assert_eq!(
+            expected_error_kind(
+                "OpenHuman API error (413 Payload Too Large): \
+                 {\"error\":{\"errorCode\":\"PAYLOAD_TOO_LARGE\",\"message\":\"request too big\"}}"
+            ),
+            None,
+            "managed PAYLOAD_TOO_LARGE guard-leak must still page"
+        );
+    }
+
+    #[test]
+    fn transient_tpm_burst_and_bare_413_do_not_demote_as_rate_cap() {
+        // The arm requires BOTH "request too large" (single-request permanence)
+        // AND a per-minute-tokens marker. A transient burst ("try again in Ns")
+        // and a bare 413 lacking those anchors must NOT be demoted to
+        // ProviderUserState — they stay retryable / Sentry-visible.
+        for raw in [
+            "groq API error (429 Too Many Requests): Rate limit reached for model \
+             `openai/gpt-oss-120b`. Please try again in 2.5s.",
+            "openai API error (413 Payload Too Large): request entity too large",
+        ] {
+            assert_ne!(
+                expected_error_kind(raw),
+                Some(ExpectedErrorKind::ProviderUserState),
+                "must NOT demote as permanent rate-cap: {raw}"
+            );
+        }
+    }
+
     // ── FilesystemUserPathInvalid (TAURI-RUST-4QH) ─────────────────────────
 
     #[test]

From a35309d2afffe90ce34d4879c920a81d34e5d8b1 Mon Sep 17 00:00:00 2001
From: oxoxDev <nikhil@tinyhumans.ai>
Date: Thu, 2 Jul 2026 16:16:53 +0530
Subject: [PATCH 3/3] fix(subconscious): halt ticks on permanent provider
 rate-cap 413 (#4404)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

TAURI-RUST-HXF: when a tick's provider config keeps rejecting with a permanent
per-minute token cap (413/TPM), the loop re-fired the doomed request every
5-30 min and re-reported it — 2232 events from one user, the cron-billing-flood
family (#3913). Add a circuit breaker keyed on the Subconscious provider
signature: on a permanent rate-cap agent error, halt the agent run; skip
subsequent ticks while the same config is set; auto-clear the moment the user
switches model/provider/tier. Mirrors the existing tool-capability
(TAURI-RUST-ADC) permanent-failure arm. In-memory only — a restart re-probes
once, then re-halts. Pure helpers unit-tested.
---
 src/openhuman/subconscious/engine.rs       | 126 +++++++++++++++++++++
 src/openhuman/subconscious/engine_tests.rs |  89 +++++++++++++++
 2 files changed, 215 insertions(+)

diff --git a/src/openhuman/subconscious/engine.rs b/src/openhuman/subconscious/engine.rs
index 0d08761aab..ef56694732 100644
--- a/src/openhuman/subconscious/engine.rs
+++ b/src/openhuman/subconscious/engine.rs
@@ -69,6 +69,12 @@ const SUBCONSCIOUS_TOOL_CATALOG: &str = "\
 /// tells the user how to recover. See TAURI-RUST-ADC.
 const TOOL_UNSUPPORTED_REASON: &str = "The selected chat model has no tool-use endpoint, so Subconscious can't run. Pick a tool-capable model in Settings > AI.";
 
+/// Surfaced in [`SubconsciousStatus`] when the circuit breaker has halted ticks
+/// because the configured Subconscious model keeps rejecting requests with a
+/// permanent per-minute token cap (413/TPM). Actionable: the fix is the user's
+/// to make (a bigger model/tier), so the message points there.
+const RATE_CAP_HALT_REASON: &str = "Subconscious is paused: the selected model rejected the request because it exceeds your provider's per-minute token limit. Pick a higher-tier model or provider for Subconscious in Settings > AI > Advanced.";
+
 /// Pick the `TrustedAutomationSource` variant for a subconscious tick.
 ///
 /// Extracted from the engine's `run_agent` body so the origin-escalation
@@ -105,6 +111,64 @@ struct EngineState {
     total_ticks: u64,
     consecutive_failures: u64,
     provider_unavailable_reason: Option<String>,
+    /// Signature of the subconscious provider routing (see
+    /// [`subconscious_provider_signature`]) that is permanently rejecting ticks
+    /// with a per-minute token-cap `413`/TPM. While set and still matching the
+    /// live config, ticks skip the agent run entirely instead of re-firing the
+    /// doomed request every interval (TAURI-RUST-HXF — a permanent provider
+    /// rejection re-reported per tick is the cron-billing-flood family, #3913).
+    /// Cleared automatically when the config's signature changes (the user
+    /// switched the Subconscious model/provider/tier). In-memory only: a restart
+    /// re-probes once, then re-halts on the first rejection — one event per
+    /// launch, not a flood.
+    rate_cap_halt_signature: Option<String>,
+}
+
+impl EngineState {
+    /// Pre-tick gate: consult the rate-cap halt against the live provider
+    /// signature. Returns `true` when the tick must skip the agent run because a
+    /// halt is active for the still-current config. A halt whose signature no
+    /// longer matches (the user switched Subconscious model/provider/tier) is
+    /// cleared here and the tick proceeds. Counts a skipped tick so status stays
+    /// accurate. TAURI-RUST-HXF.
+    fn should_skip_for_rate_cap_halt(&mut self, signature: &str) -> bool {
+        match evaluate_rate_cap_halt(self.rate_cap_halt_signature.as_deref(), signature) {
+            RateCapHaltDecision::Skip => {
+                info!(
+                    "[subconscious] halted — the Subconscious provider keeps hitting a permanent \
+                     per-minute token cap (413/TPM); skipping tick until the model/tier changes \
+                     (TAURI-RUST-HXF)"
+                );
+                self.total_ticks += 1;
+                true
+            }
+            RateCapHaltDecision::Resume => {
+                info!(
+                    "[subconscious] Subconscious provider config changed — clearing rate-cap halt \
+                     and resuming ticks"
+                );
+                self.rate_cap_halt_signature = None;
+                if self.provider_unavailable_reason.as_deref() == Some(RATE_CAP_HALT_REASON) {
+                    self.provider_unavailable_reason = None;
+                }
+                false
+            }
+            RateCapHaltDecision::Proceed => false,
+        }
+    }
+
+    /// Arm the rate-cap halt after a tick failed with a permanent per-minute
+    /// token-cap rejection, so subsequent ticks skip until the provider
+    /// signature changes. Surfaces an actionable reason in
+    /// [`SubconsciousStatus`]. TAURI-RUST-HXF.
+    fn arm_rate_cap_halt(&mut self, signature: &str) {
+        info!(
+            "[subconscious] provider rejected the tick with a permanent per-minute token cap \
+             (413/TPM) — halting until the Subconscious model/tier changes (TAURI-RUST-HXF)"
+        );
+        self.rate_cap_halt_signature = Some(signature.to_string());
+        self.provider_unavailable_reason = Some(RATE_CAP_HALT_REASON.to_string());
+    }
 }
 
 impl SubconsciousEngine {
@@ -142,6 +206,7 @@ impl SubconsciousEngine {
                 total_ticks: 0,
                 consecutive_failures: 0,
                 provider_unavailable_reason: None,
+                rate_cap_halt_signature: None,
             }),
             tick_generation: AtomicU64::new(0),
             tick_lock: Mutex::new(()),
@@ -233,6 +298,20 @@ impl SubconsciousEngine {
             }
         };
 
+        let provider_signature = subconscious_provider_signature(&config);
+        if self
+            .state
+            .lock()
+            .await
+            .should_skip_for_rate_cap_halt(&provider_signature)
+        {
+            return Ok(TickResult {
+                tick_at,
+                duration_ms: started.elapsed().as_millis() as u64,
+                response_chars: 0,
+            });
+        }
+
         if let Some(reason) = subconscious_provider_unavailable_reason(&config) {
             info!("[subconscious] provider unavailable, skipping tick: {reason}");
             let mut state = self.state.lock().await;
@@ -362,6 +441,8 @@ impl SubconsciousEngine {
                         "[subconscious] configured chat model has no tool-use endpoint — Subconscious can't run until the model changes (TAURI-RUST-ADC)"
                     );
                     state.provider_unavailable_reason = Some(TOOL_UNSUPPORTED_REASON.to_string());
+                } else if is_permanent_rate_cap_error(e) {
+                    state.arm_rate_cap_halt(&provider_signature);
                 }
             }
         } else {
@@ -687,6 +768,51 @@ fn resolve_subconscious_route(config: &Config) -> SubconsciousProviderRoute {
     }
 }
 
+/// Stable identity of the Subconscious provider routing — the exact knobs a
+/// user changes in Settings > AI > Advanced to switch the tick model/provider.
+/// The rate-cap circuit breaker keys its halt on this so a permanent per-minute
+/// token-cap rejection stops re-firing while the SAME config is set, and
+/// auto-clears the moment the user picks a different model/provider/tier.
+fn subconscious_provider_signature(config: &Config) -> String {
+    match resolve_subconscious_route(config) {
+        SubconsciousProviderRoute::LocalOllama { model } => format!("local:{model}"),
+        SubconsciousProviderRoute::OpenHumanCloud => "cloud".to_string(),
+        SubconsciousProviderRoute::Other(raw) => format!("other:{raw}"),
+    }
+}
+
+/// Outcome of comparing an active rate-cap halt against the live provider
+/// signature at the start of a tick. Pure so it is unit-testable without
+/// spinning an engine/agent.
+#[derive(Debug, PartialEq, Eq)]
+enum RateCapHaltDecision {
+    /// A halt is set for the same signature still in config — skip the run.
+    Skip,
+    /// A halt is set but the signature changed — clear it and resume ticking.
+    Resume,
+    /// No halt in effect — run the tick normally.
+    Proceed,
+}
+
+/// Decide whether a tick should skip, resume, or proceed given the stored
+/// rate-cap halt signature (if any) and the live provider signature.
+fn evaluate_rate_cap_halt(halt_signature: Option<&str>, current: &str) -> RateCapHaltDecision {
+    match halt_signature {
+        Some(sig) if sig == current => RateCapHaltDecision::Skip,
+        Some(_) => RateCapHaltDecision::Resume,
+        None => RateCapHaltDecision::Proceed,
+    }
+}
+
+/// True when an agent-run error is a permanent per-minute token-cap rejection
+/// (413/TPM) — the request is larger than the provider account's per-minute
+/// budget, so retrying the same tick can never succeed. Delegates to the shared
+/// provider matcher (single source of truth with the Sentry classifier in
+/// `core::observability`) so the wording can't drift. TAURI-RUST-HXF.
+fn is_permanent_rate_cap_error(msg: &str) -> bool {
+    crate::openhuman::inference::provider::is_provider_rate_cap_exceeded_message(msg)
+}
+
 /// True when an agent-run error means the configured chat model can't do tool
 /// calls at all — a permanent, user-actionable condition (pick a tool-capable
 /// model). Matches both the direct-provider body (`<model> does not support
diff --git a/src/openhuman/subconscious/engine_tests.rs b/src/openhuman/subconscious/engine_tests.rs
index 2d7f978df4..62588132ab 100644
--- a/src/openhuman/subconscious/engine_tests.rs
+++ b/src/openhuman/subconscious/engine_tests.rs
@@ -152,3 +152,92 @@ fn render_world_diff_caps_items_and_falls_back_to_item_id() {
     assert!(rendered.contains("[added] item_0"), "uses item_id fallback");
     assert!(rendered.contains("…and 3 more"), "caps the per-source list");
 }
+
+// ── Rate-cap circuit breaker (TAURI-RUST-HXF) ───────────────────────────
+
+#[test]
+fn evaluate_rate_cap_halt_skip_resume_proceed() {
+    // No halt in effect → run normally.
+    assert_eq!(
+        evaluate_rate_cap_halt(None, "other:groq"),
+        RateCapHaltDecision::Proceed
+    );
+    // Halt set for the same signature still in config → skip the doomed run.
+    assert_eq!(
+        evaluate_rate_cap_halt(Some("other:groq"), "other:groq"),
+        RateCapHaltDecision::Skip
+    );
+    // Halt set but the user switched provider/model → clear it and resume.
+    assert_eq!(
+        evaluate_rate_cap_halt(Some("other:groq"), "cloud"),
+        RateCapHaltDecision::Resume
+    );
+}
+
+#[test]
+fn permanent_rate_cap_error_matches_wrapped_groq_agent_error_only() {
+    // The verbatim wrapped agent-run error the tick surfaces (413/TPM) →
+    // permanent, so the breaker halts.
+    assert!(is_permanent_rate_cap_error(
+        r#"agent run: groq API error (413 Payload Too Large): {"error":{"message":"Request too large for model `openai/gpt-oss-120b` in organization `org_x` service tier `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084."}}"#
+    ));
+    // A transient 429 burst ("try again in Ns") must NOT halt — it stays
+    // retryable, so the two permanent-error arms never overlap.
+    assert!(!is_permanent_rate_cap_error(
+        "agent run: groq API error (429 Too Many Requests): Rate limit reached. Please try again in 2.5s."
+    ));
+    // A tool-capability error is a different permanent condition handled by its
+    // own arm, not the rate-cap breaker.
+    assert!(!is_permanent_rate_cap_error(
+        "agent run: No endpoints found that support tool use"
+    ));
+}
+
+#[test]
+fn subconscious_provider_signature_tracks_config_changes() {
+    // Default config routes to OpenHuman cloud.
+    let mut cfg = Config::default();
+    assert_eq!(subconscious_provider_signature(&cfg), "cloud");
+
+    // A BYO provider override yields a distinct, stable signature.
+    cfg.subconscious_provider = Some("groq".to_string());
+    let groq_sig = subconscious_provider_signature(&cfg);
+    assert_eq!(groq_sig, "other:groq");
+
+    // Switching the provider changes the signature — the breaker's cue to
+    // clear a halt and resume ticking.
+    cfg.subconscious_provider = Some("openai".to_string());
+    assert_ne!(subconscious_provider_signature(&cfg), groq_sig);
+}
+
+#[test]
+fn rate_cap_halt_state_transitions() {
+    let mut state = EngineState {
+        last_tick_at: 0.0,
+        total_ticks: 0,
+        consecutive_failures: 0,
+        provider_unavailable_reason: None,
+        rate_cap_halt_signature: None,
+    };
+
+    // No halt armed → the tick proceeds (does not skip).
+    assert!(!state.should_skip_for_rate_cap_halt("other:groq"));
+
+    // A permanent rate-cap failure arms the halt + actionable reason.
+    state.arm_rate_cap_halt("other:groq");
+    assert_eq!(state.rate_cap_halt_signature.as_deref(), Some("other:groq"));
+    assert_eq!(
+        state.provider_unavailable_reason.as_deref(),
+        Some(RATE_CAP_HALT_REASON)
+    );
+
+    // Same config still set → skip the doomed run, and count the skipped tick.
+    let before = state.total_ticks;
+    assert!(state.should_skip_for_rate_cap_halt("other:groq"));
+    assert_eq!(state.total_ticks, before + 1);
+
+    // User switched provider (signature changed) → clear halt + reason, resume.
+    assert!(!state.should_skip_for_rate_cap_halt("cloud"));
+    assert!(state.rate_cap_halt_signature.is_none());
+    assert!(state.provider_unavailable_reason.is_none());
+}