tinyhumansai · oxoxDev · Jul 2, 2026 · Jul 2, 2026 · Jul 2, 2026 · chatgpt-codex-connector
diff --git a/src/core/observability.rs b/src/core/observability.rs
@@ -1476,6 +1476,21 @@ fn is_backend_user_error_message(lower: &str) -> bool {
 /// classifier survives caller wrapping (rpc.invoke_method, agent.run_single,
 /// `[composio:gmail]` prefixes, anyhow chains, …).
 fn is_provider_user_state_message(lower: &str) -> bool {
+    // TAURI-RUST-HXF: a direct BYO provider (groq `on_demand` free tier)
+    // rejected a *single* request whose token count exceeds the account's
+    // tokens-per-minute cap — `413 Payload Too Large … Request too large …
+    // tokens per minute (TPM): Limit 8000, Requested 42084`. It is permanently
+    // non-viable on the current tier (not a burst that retry/backoff clears)
+    // and OpenHuman cannot raise a third-party account's TPM tier, so it is
+    // user-config state, not a product bug. NOTE: a *managed-backend*
+    // `PAYLOAD_TOO_LARGE` guard-leak is force-captured (returns `None`) earlier
+    // in `expected_error_kind`, before this matcher runs, so this arm only ever
+    // sees direct-provider TPM rejections. Shared matcher (single source of
+    // truth with the subconscious circuit breaker) so the wording can't drift.
+    if crate::openhuman::inference::provider::is_provider_rate_cap_exceeded_message(lower) {
+        return true;
+    }
+
     // OPENHUMAN-TAURI-3R / -3S: composio enable_trigger when the slug isn't
     // in the trigger registry (e.g. user clicked a stale UI option).
     // Backend returns 500 with `"Trigger type GITHUB_PUSH_EVENT not found"`.
@@ -3760,6 +3775,62 @@ mod tests {
         }
     }
 
+    // ── ProviderUserState: permanent TPM rate cap (TAURI-RUST-HXF) ─────────
+
+    #[test]
+    fn classifies_provider_rate_cap_413_tpm_rereport_as_provider_user_state() {
+        // TAURI-RUST-HXF: verbatim groq `on_demand` free-tier body — a single
+        // subconscious request (42084 tokens) exceeds the 8000 tokens-per-minute
+        // cap, so groq returns 413 and no retry can ever fit it. When re-raised
+        // by `agent.run_single` under `domain=agent`, `report_error_or_expected`
+        // must demote it to expected user-config state (the user's account tier
+        // is not a lever OpenHuman controls) instead of paging Sentry.
+        assert_eq!(
+            expected_error_kind(
+                "groq API error (413 Payload Too Large): {\"error\":{\"message\":\"Request too large \
+                 for model `openai/gpt-oss-120b` in organization `org_01k48ewn75ez7tsgw5hmd72px2` \
+                 service tier `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084. \
+                 Please try again later.\",\"type\":\"tokens\",\"code\":\"rate_limit_exceeded\"}}"
+            ),
+            Some(ExpectedErrorKind::ProviderUserState)
+        );
+    }
+
+    #[test]
+    fn managed_backend_payload_too_large_still_pages_despite_rate_cap_arm() {
+        // Regression pin: a *managed-backend* `PAYLOAD_TOO_LARGE` is a
+        // client-guard leak (the client was supposed to bound the request) and
+        // MUST keep paging. The guard-leak arm returns `None` before the
+        // ProviderUserState matcher runs, so the new TPM arm cannot demote it.
+        assert_eq!(
+            expected_error_kind(
+                "OpenHuman API error (413 Payload Too Large): \
+                 {\"error\":{\"errorCode\":\"PAYLOAD_TOO_LARGE\",\"message\":\"request too big\"}}"
+            ),
+            None,
+            "managed PAYLOAD_TOO_LARGE guard-leak must still page"
+        );
+    }
+
+    #[test]
+    fn transient_tpm_burst_and_bare_413_do_not_demote_as_rate_cap() {
+        // The arm requires BOTH "request too large" (single-request permanence)
+        // AND a per-minute-tokens marker. A transient burst ("try again in Ns")
+        // and a bare 413 lacking those anchors must NOT be demoted to
+        // ProviderUserState — they stay retryable / Sentry-visible.
+        for raw in [
+            "groq API error (429 Too Many Requests): Rate limit reached for model \
+             `openai/gpt-oss-120b`. Please try again in 2.5s.",
+            "openai API error (413 Payload Too Large): request entity too large",
+        ] {
+            assert_ne!(
+                expected_error_kind(raw),
+                Some(ExpectedErrorKind::ProviderUserState),
+                "must NOT demote as permanent rate-cap: {raw}"
+            );
+        }
+    }
+
     // ── FilesystemUserPathInvalid (TAURI-RUST-4QH) ─────────────────────────
 
     #[test]

diff --git a/src/openhuman/inference/provider/ops/http_error.rs b/src/openhuman/inference/provider/ops/http_error.rs
@@ -667,6 +667,42 @@ pub fn log_context_window_exceeded(
     );
 }
 
+/// Whether a provider error body is a **permanent per-request rate-cap
+/// rejection**: the provider refused because a *single* request's token count
+/// exceeds the account's tokens-per-minute (TPM) budget, so no amount of
+/// retrying or spacing can ever let it through on the current tier.
+///
+/// Distinct from a *transient* TPM `429` ("rate limit reached … try again in
+/// 2s" — a burst that [`is_context_window_exceeded_message`] and the `reliable`
+/// retry classifier deliberately keep retryable), from a monthly-plan quota
+/// ([`body_indicates_quota_exhausted`]), and from context-window overflow
+/// ([`is_context_window_exceeded_message`], a model-size limit not a rate cap).
+/// Here the request is larger than the per-minute limit outright, so it is
+/// permanently non-viable until the user picks a higher-tier model/provider —
+/// OpenHuman has no lever to raise a third-party account's TPM tier.
+///
+/// Canonical wire shape (groq `on_demand` free tier, Sentry TAURI-RUST-HXF):
+/// `groq API error (413 Payload Too Large): {"error":{"message":"Request too
+/// large for model `openai/gpt-oss-120b` in organization `org_…` service tier
+/// `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084 …"}}`.
+///
+/// Anchored on BOTH the permanence marker `"request too large"` (a single
+/// request over the cap, not a burst) AND a per-minute-tokens marker
+/// (`"tokens per minute"` / `"(tpm)"`), so a transient "rate limit reached,
+/// retry in Ns" burst — which lacks "request too large" — is NOT swallowed and
+/// stays retryable + Sentry-visible. Status-agnostic (groq uses `413`; a
+/// gateway could wrap it) and covered by a verbatim-body test so a provider
+/// wording drift fails CI. Single source of truth shared by
+/// [`crate::core::observability::is_provider_user_state_message`] (Sentry
+/// demotion of the `domain=agent` re-report) and the subconscious tick loop's
+/// permanent-rejection circuit breaker
+/// (`crate::openhuman::subconscious::engine`).
+pub fn is_provider_rate_cap_exceeded_message(body: &str) -> bool {
+    let lower = body.to_ascii_lowercase();
+    lower.contains("request too large")
+        && (lower.contains("tokens per minute") || lower.contains("(tpm)"))
+}
+
 /// Whether a provider non-2xx response is the OpenHuman **backend** rejecting
 /// the app session JWT (`401`/`403`). This is expected user-session state
 /// (token expired / revoked / rotated server-side), not a product bug — the
@@ -1225,6 +1261,33 @@ mod tests {
         assert!(body_indicates_quota_exhausted(C9A_BODY));
     }
 
+    #[test]
+    fn rate_cap_exceeded_matches_verbatim_hxf_body_but_not_transient_or_context() {
+        // TAURI-RUST-HXF: verbatim groq `on_demand` free-tier 413 — a single
+        // request over the per-minute token cap. Status-agnostic; anchored on
+        // BOTH "request too large" (single-request permanence) and a
+        // tokens-per-minute marker.
+        assert!(is_provider_rate_cap_exceeded_message(
+            "groq API error (413 Payload Too Large): {\"error\":{\"message\":\"Request too large \
+             for model `openai/gpt-oss-120b` in organization `org_x` service tier `on_demand` on \
+             tokens per minute (TPM): Limit 8000, Requested 42084.\",\"code\":\"rate_limit_exceeded\"}}"
+        ));
+        // Transient burst ("try again in Ns") lacks "request too large" → stays
+        // retryable + Sentry-visible.
+        assert!(!is_provider_rate_cap_exceeded_message(
+            "groq API error (429 Too Many Requests): Rate limit reached. Please try again in 2.5s."
+        ));
+        // Context-window overflow is a different bucket (model size, not a rate
+        // cap) — no tokens-per-minute marker.
+        assert!(!is_provider_rate_cap_exceeded_message(
+            "openai API error (400): This model's maximum context length is 8192 tokens"
+        ));
+        // A bare 413 with no TPM marker must not match.
+        assert!(!is_provider_rate_cap_exceeded_message(
+            "openai API error (413 Payload Too Large): request entity too large"
+        ));
+    }
+
     #[test]
     fn quota_exhausted_matches_verbatim_afe_body() {
         // Coverage gap closed (TAURI-RUST-AFE): the Responses `usage_limit_reached`

diff --git a/src/openhuman/inference/provider/ops/mod.rs b/src/openhuman/inference/provider/ops/mod.rs
@@ -27,15 +27,15 @@ pub use http_error::{
     is_openai_oauth_session_expired_http, is_openai_oauth_session_expired_message,
     is_provider_access_policy_denied_http_403, is_provider_config_rejection_http,
     is_provider_insufficient_credits_402, is_provider_moderation_rejection_http_400,
-    is_provider_quota_exhausted, local_provider_no_model_loaded_user_message,
-    log_backend_error_code_owned, log_budget_exhausted_http_400, log_byo_provider_auth_failure,
-    log_context_window_exceeded, log_custom_openai_upstream_bad_request_http_400,
-    log_local_provider_no_model_loaded, log_ollama_cloud_internal_500,
-    log_openai_oauth_session_expired, log_provider_access_policy_denied_http_403,
-    log_provider_config_rejection, log_provider_insufficient_credits_402,
-    log_provider_moderation_rejection, log_provider_quota_exhausted,
-    ollama_cloud_internal_500_user_message, publish_backend_session_expired,
-    should_report_provider_http_failure,
+    is_provider_quota_exhausted, is_provider_rate_cap_exceeded_message,
+    local_provider_no_model_loaded_user_message, log_backend_error_code_owned,
+    log_budget_exhausted_http_400, log_byo_provider_auth_failure, log_context_window_exceeded,
+    log_custom_openai_upstream_bad_request_http_400, log_local_provider_no_model_loaded,
+    log_ollama_cloud_internal_500, log_openai_oauth_session_expired,
+    log_provider_access_policy_denied_http_403, log_provider_config_rejection,
+    log_provider_insufficient_credits_402, log_provider_moderation_rejection,
+    log_provider_quota_exhausted, ollama_cloud_internal_500_user_message,
+    publish_backend_session_expired, should_report_provider_http_failure,
 };
 
 pub use models::{

diff --git a/src/openhuman/subconscious/engine.rs b/src/openhuman/subconscious/engine.rs
@@ -69,6 +69,12 @@ const SUBCONSCIOUS_TOOL_CATALOG: &str = "\
 /// tells the user how to recover. See TAURI-RUST-ADC.
 const TOOL_UNSUPPORTED_REASON: &str = "The selected chat model has no tool-use endpoint, so Subconscious can't run. Pick a tool-capable model in Settings > AI.";
 
+/// Surfaced in [`SubconsciousStatus`] when the circuit breaker has halted ticks
+/// because the configured Subconscious model keeps rejecting requests with a
+/// permanent per-minute token cap (413/TPM). Actionable: the fix is the user's
+/// to make (a bigger model/tier), so the message points there.
+const RATE_CAP_HALT_REASON: &str = "Subconscious is paused: the selected model rejected the request because it exceeds your provider's per-minute token limit. Pick a higher-tier model or provider for Subconscious in Settings > AI > Advanced.";
+
 /// Pick the `TrustedAutomationSource` variant for a subconscious tick.
 ///
 /// Extracted from the engine's `run_agent` body so the origin-escalation
@@ -105,6 +111,64 @@ struct EngineState {
     total_ticks: u64,
     consecutive_failures: u64,
     provider_unavailable_reason: Option<String>,
+    /// Signature of the subconscious provider routing (see
+    /// [`subconscious_provider_signature`]) that is permanently rejecting ticks
+    /// with a per-minute token-cap `413`/TPM. While set and still matching the
+    /// live config, ticks skip the agent run entirely instead of re-firing the
+    /// doomed request every interval (TAURI-RUST-HXF — a permanent provider
+    /// rejection re-reported per tick is the cron-billing-flood family, #3913).
+    /// Cleared automatically when the config's signature changes (the user
+    /// switched the Subconscious model/provider/tier). In-memory only: a restart
+    /// re-probes once, then re-halts on the first rejection — one event per
+    /// launch, not a flood.
+    rate_cap_halt_signature: Option<String>,
+}
+
+impl EngineState {
+    /// Pre-tick gate: consult the rate-cap halt against the live provider
+    /// signature. Returns `true` when the tick must skip the agent run because a
+    /// halt is active for the still-current config. A halt whose signature no
+    /// longer matches (the user switched Subconscious model/provider/tier) is
+    /// cleared here and the tick proceeds. Counts a skipped tick so status stays
+    /// accurate. TAURI-RUST-HXF.
+    fn should_skip_for_rate_cap_halt(&mut self, signature: &str) -> bool {
+        match evaluate_rate_cap_halt(self.rate_cap_halt_signature.as_deref(), signature) {
+            RateCapHaltDecision::Skip => {
+                info!(
+                    "[subconscious] halted — the Subconscious provider keeps hitting a permanent \
+                     per-minute token cap (413/TPM); skipping tick until the model/tier changes \
+                     (TAURI-RUST-HXF)"
+                );
+                self.total_ticks += 1;
+                true
+            }
+            RateCapHaltDecision::Resume => {
+                info!(
+                    "[subconscious] Subconscious provider config changed — clearing rate-cap halt \
+                     and resuming ticks"
+                );
+                self.rate_cap_halt_signature = None;
+                if self.provider_unavailable_reason.as_deref() == Some(RATE_CAP_HALT_REASON) {
+                    self.provider_unavailable_reason = None;
+                }
+                false
+            }
+            RateCapHaltDecision::Proceed => false,
+        }
+    }
+
+    /// Arm the rate-cap halt after a tick failed with a permanent per-minute
+    /// token-cap rejection, so subsequent ticks skip until the provider
+    /// signature changes. Surfaces an actionable reason in
+    /// [`SubconsciousStatus`]. TAURI-RUST-HXF.
+    fn arm_rate_cap_halt(&mut self, signature: &str) {
+        info!(
+            "[subconscious] provider rejected the tick with a permanent per-minute token cap \
+             (413/TPM) — halting until the Subconscious model/tier changes (TAURI-RUST-HXF)"
+        );
+        self.rate_cap_halt_signature = Some(signature.to_string());
+        self.provider_unavailable_reason = Some(RATE_CAP_HALT_REASON.to_string());
+    }
 }
 
 impl SubconsciousEngine {
@@ -142,6 +206,7 @@ impl SubconsciousEngine {
                 total_ticks: 0,
                 consecutive_failures: 0,
                 provider_unavailable_reason: None,
+                rate_cap_halt_signature: None,
             }),
             tick_generation: AtomicU64::new(0),
             tick_lock: Mutex::new(()),
@@ -233,6 +298,20 @@ impl SubconsciousEngine {
             }
         };
 
+        let provider_signature = subconscious_provider_signature(&config);
+        if self
+            .state
+            .lock()
+            .await
+            .should_skip_for_rate_cap_halt(&provider_signature)
+        {
+            return Ok(TickResult {
+                tick_at,
+                duration_ms: started.elapsed().as_millis() as u64,
+                response_chars: 0,
+            });
+        }
+
         if let Some(reason) = subconscious_provider_unavailable_reason(&config) {
             info!("[subconscious] provider unavailable, skipping tick: {reason}");
             let mut state = self.state.lock().await;
@@ -362,6 +441,8 @@ impl SubconsciousEngine {
                         "[subconscious] configured chat model has no tool-use endpoint — Subconscious can't run until the model changes (TAURI-RUST-ADC)"
                     );
                     state.provider_unavailable_reason = Some(TOOL_UNSUPPORTED_REASON.to_string());
+                } else if is_permanent_rate_cap_error(e) {
+                    state.arm_rate_cap_halt(&provider_signature);
                 }
             }
         } else {
@@ -687,6 +768,51 @@ fn resolve_subconscious_route(config: &Config) -> SubconsciousProviderRoute {
     }
 }
 
+/// Stable identity of the Subconscious provider routing — the exact knobs a
+/// user changes in Settings > AI > Advanced to switch the tick model/provider.
+/// The rate-cap circuit breaker keys its halt on this so a permanent per-minute
+/// token-cap rejection stops re-firing while the SAME config is set, and
+/// auto-clears the moment the user picks a different model/provider/tier.
+fn subconscious_provider_signature(config: &Config) -> String {
+    match resolve_subconscious_route(config) {
+        SubconsciousProviderRoute::LocalOllama { model } => format!("local:{model}"),
+        SubconsciousProviderRoute::OpenHumanCloud => "cloud".to_string(),
+        SubconsciousProviderRoute::Other(raw) => format!("other:{raw}"),
+    }
+}
+
+/// Outcome of comparing an active rate-cap halt against the live provider
+/// signature at the start of a tick. Pure so it is unit-testable without
+/// spinning an engine/agent.
+#[derive(Debug, PartialEq, Eq)]
+enum RateCapHaltDecision {
+    /// A halt is set for the same signature still in config — skip the run.
+    Skip,
+    /// A halt is set but the signature changed — clear it and resume ticking.
+    Resume,
+    /// No halt in effect — run the tick normally.
+    Proceed,
+}
+
+/// Decide whether a tick should skip, resume, or proceed given the stored
+/// rate-cap halt signature (if any) and the live provider signature.
+fn evaluate_rate_cap_halt(halt_signature: Option<&str>, current: &str) -> RateCapHaltDecision {
+    match halt_signature {
+        Some(sig) if sig == current => RateCapHaltDecision::Skip,
+        Some(_) => RateCapHaltDecision::Resume,
+        None => RateCapHaltDecision::Proceed,
+    }
+}
+
+/// True when an agent-run error is a permanent per-minute token-cap rejection
+/// (413/TPM) — the request is larger than the provider account's per-minute
+/// budget, so retrying the same tick can never succeed. Delegates to the shared
+/// provider matcher (single source of truth with the Sentry classifier in
+/// `core::observability`) so the wording can't drift. TAURI-RUST-HXF.
+fn is_permanent_rate_cap_error(msg: &str) -> bool {
+    crate::openhuman::inference::provider::is_provider_rate_cap_exceeded_message(msg)
+}
+
 /// True when an agent-run error means the configured chat model can't do tool
 /// calls at all — a permanent, user-actionable condition (pick a tool-capable
 /// model). Matches both the direct-provider body (`<model> does not support