From ab3586ede9517c9fd02c12dc0a43bba1a78dcce8 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 2 Jul 2026 16:16:39 +0530 Subject: [PATCH 1/3] feat(inference): add permanent per-request TPM rate-cap matcher (#4404) Recognize a direct BYO-provider 413 whose single-request token count exceeds the account's tokens-per-minute cap (groq on_demand free tier). Anchored on both "request too large" (single-request permanence) and a tokens-per-minute marker, so a transient 429 burst and context-window overflow stay in their own buckets. Single source of truth for the Sentry classifier and the subconscious circuit breaker. Verbatim-body test guards against wording drift. --- .../inference/provider/ops/http_error.rs | 63 +++++++++++++++++++ src/openhuman/inference/provider/ops/mod.rs | 18 +++--- 2 files changed, 72 insertions(+), 9 deletions(-) diff --git a/src/openhuman/inference/provider/ops/http_error.rs b/src/openhuman/inference/provider/ops/http_error.rs index 46d49c5558..8259e27bd1 100644 --- a/src/openhuman/inference/provider/ops/http_error.rs +++ b/src/openhuman/inference/provider/ops/http_error.rs @@ -667,6 +667,42 @@ pub fn log_context_window_exceeded( ); } +/// Whether a provider error body is a **permanent per-request rate-cap +/// rejection**: the provider refused because a *single* request's token count +/// exceeds the account's tokens-per-minute (TPM) budget, so no amount of +/// retrying or spacing can ever let it through on the current tier. +/// +/// Distinct from a *transient* TPM `429` ("rate limit reached … try again in +/// 2s" — a burst that [`is_context_window_exceeded_message`] and the `reliable` +/// retry classifier deliberately keep retryable), from a monthly-plan quota +/// ([`body_indicates_quota_exhausted`]), and from context-window overflow +/// ([`is_context_window_exceeded_message`], a model-size limit not a rate cap). +/// Here the request is larger than the per-minute limit outright, so it is +/// permanently non-viable until the user picks a higher-tier model/provider — +/// OpenHuman has no lever to raise a third-party account's TPM tier. +/// +/// Canonical wire shape (groq `on_demand` free tier, Sentry TAURI-RUST-HXF): +/// `groq API error (413 Payload Too Large): {"error":{"message":"Request too +/// large for model `openai/gpt-oss-120b` in organization `org_…` service tier +/// `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084 …"}}`. +/// +/// Anchored on BOTH the permanence marker `"request too large"` (a single +/// request over the cap, not a burst) AND a per-minute-tokens marker +/// (`"tokens per minute"` / `"(tpm)"`), so a transient "rate limit reached, +/// retry in Ns" burst — which lacks "request too large" — is NOT swallowed and +/// stays retryable + Sentry-visible. Status-agnostic (groq uses `413`; a +/// gateway could wrap it) and covered by a verbatim-body test so a provider +/// wording drift fails CI. Single source of truth shared by +/// [`crate::core::observability::is_provider_user_state_message`] (Sentry +/// demotion of the `domain=agent` re-report) and the subconscious tick loop's +/// permanent-rejection circuit breaker +/// (`crate::openhuman::subconscious::engine`). +pub fn is_provider_rate_cap_exceeded_message(body: &str) -> bool { + let lower = body.to_ascii_lowercase(); + lower.contains("request too large") + && (lower.contains("tokens per minute") || lower.contains("(tpm)")) +} + /// Whether a provider non-2xx response is the OpenHuman **backend** rejecting /// the app session JWT (`401`/`403`). This is expected user-session state /// (token expired / revoked / rotated server-side), not a product bug — the @@ -1225,6 +1261,33 @@ mod tests { assert!(body_indicates_quota_exhausted(C9A_BODY)); } + #[test] + fn rate_cap_exceeded_matches_verbatim_hxf_body_but_not_transient_or_context() { + // TAURI-RUST-HXF: verbatim groq `on_demand` free-tier 413 — a single + // request over the per-minute token cap. Status-agnostic; anchored on + // BOTH "request too large" (single-request permanence) and a + // tokens-per-minute marker. + assert!(is_provider_rate_cap_exceeded_message( + "groq API error (413 Payload Too Large): {\"error\":{\"message\":\"Request too large \ + for model `openai/gpt-oss-120b` in organization `org_x` service tier `on_demand` on \ + tokens per minute (TPM): Limit 8000, Requested 42084.\",\"code\":\"rate_limit_exceeded\"}}" + )); + // Transient burst ("try again in Ns") lacks "request too large" → stays + // retryable + Sentry-visible. + assert!(!is_provider_rate_cap_exceeded_message( + "groq API error (429 Too Many Requests): Rate limit reached. Please try again in 2.5s." + )); + // Context-window overflow is a different bucket (model size, not a rate + // cap) — no tokens-per-minute marker. + assert!(!is_provider_rate_cap_exceeded_message( + "openai API error (400): This model's maximum context length is 8192 tokens" + )); + // A bare 413 with no TPM marker must not match. + assert!(!is_provider_rate_cap_exceeded_message( + "openai API error (413 Payload Too Large): request entity too large" + )); + } + #[test] fn quota_exhausted_matches_verbatim_afe_body() { // Coverage gap closed (TAURI-RUST-AFE): the Responses `usage_limit_reached` diff --git a/src/openhuman/inference/provider/ops/mod.rs b/src/openhuman/inference/provider/ops/mod.rs index a4940ee2ad..e920d6ddba 100644 --- a/src/openhuman/inference/provider/ops/mod.rs +++ b/src/openhuman/inference/provider/ops/mod.rs @@ -27,15 +27,15 @@ pub use http_error::{ is_openai_oauth_session_expired_http, is_openai_oauth_session_expired_message, is_provider_access_policy_denied_http_403, is_provider_config_rejection_http, is_provider_insufficient_credits_402, is_provider_moderation_rejection_http_400, - is_provider_quota_exhausted, local_provider_no_model_loaded_user_message, - log_backend_error_code_owned, log_budget_exhausted_http_400, log_byo_provider_auth_failure, - log_context_window_exceeded, log_custom_openai_upstream_bad_request_http_400, - log_local_provider_no_model_loaded, log_ollama_cloud_internal_500, - log_openai_oauth_session_expired, log_provider_access_policy_denied_http_403, - log_provider_config_rejection, log_provider_insufficient_credits_402, - log_provider_moderation_rejection, log_provider_quota_exhausted, - ollama_cloud_internal_500_user_message, publish_backend_session_expired, - should_report_provider_http_failure, + is_provider_quota_exhausted, is_provider_rate_cap_exceeded_message, + local_provider_no_model_loaded_user_message, log_backend_error_code_owned, + log_budget_exhausted_http_400, log_byo_provider_auth_failure, log_context_window_exceeded, + log_custom_openai_upstream_bad_request_http_400, log_local_provider_no_model_loaded, + log_ollama_cloud_internal_500, log_openai_oauth_session_expired, + log_provider_access_policy_denied_http_403, log_provider_config_rejection, + log_provider_insufficient_credits_402, log_provider_moderation_rejection, + log_provider_quota_exhausted, ollama_cloud_internal_500_user_message, + publish_backend_session_expired, should_report_provider_http_failure, }; pub use models::{ From 4bad5d07b357ef613a8564467c1556ad05d20b81 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 2 Jul 2026 16:16:46 +0530 Subject: [PATCH 2/3] fix(observability): demote direct-provider 413/TPM rate-cap (#4404) TAURI-RUST-HXF: a direct BYO provider (groq on_demand free tier) rejecting a single request that exceeds the account per-minute token cap is user-config state OpenHuman cannot lift, not a product bug. Add it to is_provider_user_state_message so the domain=agent re-report demotes instead of paging. The managed-backend PAYLOAD_TOO_LARGE guard-leak still force-captures earlier, so this arm only sees direct-provider TPM rejections. Regression test pins the managed path still pages and a transient/bare 413 is not demoted. --- src/core/observability.rs | 71 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/core/observability.rs b/src/core/observability.rs index 346645d7ca..de9a617d0a 100644 --- a/src/core/observability.rs +++ b/src/core/observability.rs @@ -1476,6 +1476,21 @@ fn is_backend_user_error_message(lower: &str) -> bool { /// classifier survives caller wrapping (rpc.invoke_method, agent.run_single, /// `[composio:gmail]` prefixes, anyhow chains, …). fn is_provider_user_state_message(lower: &str) -> bool { + // TAURI-RUST-HXF: a direct BYO provider (groq `on_demand` free tier) + // rejected a *single* request whose token count exceeds the account's + // tokens-per-minute cap — `413 Payload Too Large … Request too large … + // tokens per minute (TPM): Limit 8000, Requested 42084`. It is permanently + // non-viable on the current tier (not a burst that retry/backoff clears) + // and OpenHuman cannot raise a third-party account's TPM tier, so it is + // user-config state, not a product bug. NOTE: a *managed-backend* + // `PAYLOAD_TOO_LARGE` guard-leak is force-captured (returns `None`) earlier + // in `expected_error_kind`, before this matcher runs, so this arm only ever + // sees direct-provider TPM rejections. Shared matcher (single source of + // truth with the subconscious circuit breaker) so the wording can't drift. + if crate::openhuman::inference::provider::is_provider_rate_cap_exceeded_message(lower) { + return true; + } + // OPENHUMAN-TAURI-3R / -3S: composio enable_trigger when the slug isn't // in the trigger registry (e.g. user clicked a stale UI option). // Backend returns 500 with `"Trigger type GITHUB_PUSH_EVENT not found"`. @@ -3760,6 +3775,62 @@ mod tests { } } + // ── ProviderUserState: permanent TPM rate cap (TAURI-RUST-HXF) ───────── + + #[test] + fn classifies_provider_rate_cap_413_tpm_rereport_as_provider_user_state() { + // TAURI-RUST-HXF: verbatim groq `on_demand` free-tier body — a single + // subconscious request (42084 tokens) exceeds the 8000 tokens-per-minute + // cap, so groq returns 413 and no retry can ever fit it. When re-raised + // by `agent.run_single` under `domain=agent`, `report_error_or_expected` + // must demote it to expected user-config state (the user's account tier + // is not a lever OpenHuman controls) instead of paging Sentry. + assert_eq!( + expected_error_kind( + "groq API error (413 Payload Too Large): {\"error\":{\"message\":\"Request too large \ + for model `openai/gpt-oss-120b` in organization `org_01k48ewn75ez7tsgw5hmd72px2` \ + service tier `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084. \ + Please try again later.\",\"type\":\"tokens\",\"code\":\"rate_limit_exceeded\"}}" + ), + Some(ExpectedErrorKind::ProviderUserState) + ); + } + + #[test] + fn managed_backend_payload_too_large_still_pages_despite_rate_cap_arm() { + // Regression pin: a *managed-backend* `PAYLOAD_TOO_LARGE` is a + // client-guard leak (the client was supposed to bound the request) and + // MUST keep paging. The guard-leak arm returns `None` before the + // ProviderUserState matcher runs, so the new TPM arm cannot demote it. + assert_eq!( + expected_error_kind( + "OpenHuman API error (413 Payload Too Large): \ + {\"error\":{\"errorCode\":\"PAYLOAD_TOO_LARGE\",\"message\":\"request too big\"}}" + ), + None, + "managed PAYLOAD_TOO_LARGE guard-leak must still page" + ); + } + + #[test] + fn transient_tpm_burst_and_bare_413_do_not_demote_as_rate_cap() { + // The arm requires BOTH "request too large" (single-request permanence) + // AND a per-minute-tokens marker. A transient burst ("try again in Ns") + // and a bare 413 lacking those anchors must NOT be demoted to + // ProviderUserState — they stay retryable / Sentry-visible. + for raw in [ + "groq API error (429 Too Many Requests): Rate limit reached for model \ + `openai/gpt-oss-120b`. Please try again in 2.5s.", + "openai API error (413 Payload Too Large): request entity too large", + ] { + assert_ne!( + expected_error_kind(raw), + Some(ExpectedErrorKind::ProviderUserState), + "must NOT demote as permanent rate-cap: {raw}" + ); + } + } + // ── FilesystemUserPathInvalid (TAURI-RUST-4QH) ───────────────────────── #[test] From a35309d2afffe90ce34d4879c920a81d34e5d8b1 Mon Sep 17 00:00:00 2001 From: oxoxDev Date: Thu, 2 Jul 2026 16:16:53 +0530 Subject: [PATCH 3/3] fix(subconscious): halt ticks on permanent provider rate-cap 413 (#4404) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit TAURI-RUST-HXF: when a tick's provider config keeps rejecting with a permanent per-minute token cap (413/TPM), the loop re-fired the doomed request every 5-30 min and re-reported it — 2232 events from one user, the cron-billing-flood family (#3913). Add a circuit breaker keyed on the Subconscious provider signature: on a permanent rate-cap agent error, halt the agent run; skip subsequent ticks while the same config is set; auto-clear the moment the user switches model/provider/tier. Mirrors the existing tool-capability (TAURI-RUST-ADC) permanent-failure arm. In-memory only — a restart re-probes once, then re-halts. Pure helpers unit-tested. --- src/openhuman/subconscious/engine.rs | 126 +++++++++++++++++++++ src/openhuman/subconscious/engine_tests.rs | 89 +++++++++++++++ 2 files changed, 215 insertions(+) diff --git a/src/openhuman/subconscious/engine.rs b/src/openhuman/subconscious/engine.rs index 0d08761aab..ef56694732 100644 --- a/src/openhuman/subconscious/engine.rs +++ b/src/openhuman/subconscious/engine.rs @@ -69,6 +69,12 @@ const SUBCONSCIOUS_TOOL_CATALOG: &str = "\ /// tells the user how to recover. See TAURI-RUST-ADC. const TOOL_UNSUPPORTED_REASON: &str = "The selected chat model has no tool-use endpoint, so Subconscious can't run. Pick a tool-capable model in Settings > AI."; +/// Surfaced in [`SubconsciousStatus`] when the circuit breaker has halted ticks +/// because the configured Subconscious model keeps rejecting requests with a +/// permanent per-minute token cap (413/TPM). Actionable: the fix is the user's +/// to make (a bigger model/tier), so the message points there. +const RATE_CAP_HALT_REASON: &str = "Subconscious is paused: the selected model rejected the request because it exceeds your provider's per-minute token limit. Pick a higher-tier model or provider for Subconscious in Settings > AI > Advanced."; + /// Pick the `TrustedAutomationSource` variant for a subconscious tick. /// /// Extracted from the engine's `run_agent` body so the origin-escalation @@ -105,6 +111,64 @@ struct EngineState { total_ticks: u64, consecutive_failures: u64, provider_unavailable_reason: Option, + /// Signature of the subconscious provider routing (see + /// [`subconscious_provider_signature`]) that is permanently rejecting ticks + /// with a per-minute token-cap `413`/TPM. While set and still matching the + /// live config, ticks skip the agent run entirely instead of re-firing the + /// doomed request every interval (TAURI-RUST-HXF — a permanent provider + /// rejection re-reported per tick is the cron-billing-flood family, #3913). + /// Cleared automatically when the config's signature changes (the user + /// switched the Subconscious model/provider/tier). In-memory only: a restart + /// re-probes once, then re-halts on the first rejection — one event per + /// launch, not a flood. + rate_cap_halt_signature: Option, +} + +impl EngineState { + /// Pre-tick gate: consult the rate-cap halt against the live provider + /// signature. Returns `true` when the tick must skip the agent run because a + /// halt is active for the still-current config. A halt whose signature no + /// longer matches (the user switched Subconscious model/provider/tier) is + /// cleared here and the tick proceeds. Counts a skipped tick so status stays + /// accurate. TAURI-RUST-HXF. + fn should_skip_for_rate_cap_halt(&mut self, signature: &str) -> bool { + match evaluate_rate_cap_halt(self.rate_cap_halt_signature.as_deref(), signature) { + RateCapHaltDecision::Skip => { + info!( + "[subconscious] halted — the Subconscious provider keeps hitting a permanent \ + per-minute token cap (413/TPM); skipping tick until the model/tier changes \ + (TAURI-RUST-HXF)" + ); + self.total_ticks += 1; + true + } + RateCapHaltDecision::Resume => { + info!( + "[subconscious] Subconscious provider config changed — clearing rate-cap halt \ + and resuming ticks" + ); + self.rate_cap_halt_signature = None; + if self.provider_unavailable_reason.as_deref() == Some(RATE_CAP_HALT_REASON) { + self.provider_unavailable_reason = None; + } + false + } + RateCapHaltDecision::Proceed => false, + } + } + + /// Arm the rate-cap halt after a tick failed with a permanent per-minute + /// token-cap rejection, so subsequent ticks skip until the provider + /// signature changes. Surfaces an actionable reason in + /// [`SubconsciousStatus`]. TAURI-RUST-HXF. + fn arm_rate_cap_halt(&mut self, signature: &str) { + info!( + "[subconscious] provider rejected the tick with a permanent per-minute token cap \ + (413/TPM) — halting until the Subconscious model/tier changes (TAURI-RUST-HXF)" + ); + self.rate_cap_halt_signature = Some(signature.to_string()); + self.provider_unavailable_reason = Some(RATE_CAP_HALT_REASON.to_string()); + } } impl SubconsciousEngine { @@ -142,6 +206,7 @@ impl SubconsciousEngine { total_ticks: 0, consecutive_failures: 0, provider_unavailable_reason: None, + rate_cap_halt_signature: None, }), tick_generation: AtomicU64::new(0), tick_lock: Mutex::new(()), @@ -233,6 +298,20 @@ impl SubconsciousEngine { } }; + let provider_signature = subconscious_provider_signature(&config); + if self + .state + .lock() + .await + .should_skip_for_rate_cap_halt(&provider_signature) + { + return Ok(TickResult { + tick_at, + duration_ms: started.elapsed().as_millis() as u64, + response_chars: 0, + }); + } + if let Some(reason) = subconscious_provider_unavailable_reason(&config) { info!("[subconscious] provider unavailable, skipping tick: {reason}"); let mut state = self.state.lock().await; @@ -362,6 +441,8 @@ impl SubconsciousEngine { "[subconscious] configured chat model has no tool-use endpoint — Subconscious can't run until the model changes (TAURI-RUST-ADC)" ); state.provider_unavailable_reason = Some(TOOL_UNSUPPORTED_REASON.to_string()); + } else if is_permanent_rate_cap_error(e) { + state.arm_rate_cap_halt(&provider_signature); } } } else { @@ -687,6 +768,51 @@ fn resolve_subconscious_route(config: &Config) -> SubconsciousProviderRoute { } } +/// Stable identity of the Subconscious provider routing — the exact knobs a +/// user changes in Settings > AI > Advanced to switch the tick model/provider. +/// The rate-cap circuit breaker keys its halt on this so a permanent per-minute +/// token-cap rejection stops re-firing while the SAME config is set, and +/// auto-clears the moment the user picks a different model/provider/tier. +fn subconscious_provider_signature(config: &Config) -> String { + match resolve_subconscious_route(config) { + SubconsciousProviderRoute::LocalOllama { model } => format!("local:{model}"), + SubconsciousProviderRoute::OpenHumanCloud => "cloud".to_string(), + SubconsciousProviderRoute::Other(raw) => format!("other:{raw}"), + } +} + +/// Outcome of comparing an active rate-cap halt against the live provider +/// signature at the start of a tick. Pure so it is unit-testable without +/// spinning an engine/agent. +#[derive(Debug, PartialEq, Eq)] +enum RateCapHaltDecision { + /// A halt is set for the same signature still in config — skip the run. + Skip, + /// A halt is set but the signature changed — clear it and resume ticking. + Resume, + /// No halt in effect — run the tick normally. + Proceed, +} + +/// Decide whether a tick should skip, resume, or proceed given the stored +/// rate-cap halt signature (if any) and the live provider signature. +fn evaluate_rate_cap_halt(halt_signature: Option<&str>, current: &str) -> RateCapHaltDecision { + match halt_signature { + Some(sig) if sig == current => RateCapHaltDecision::Skip, + Some(_) => RateCapHaltDecision::Resume, + None => RateCapHaltDecision::Proceed, + } +} + +/// True when an agent-run error is a permanent per-minute token-cap rejection +/// (413/TPM) — the request is larger than the provider account's per-minute +/// budget, so retrying the same tick can never succeed. Delegates to the shared +/// provider matcher (single source of truth with the Sentry classifier in +/// `core::observability`) so the wording can't drift. TAURI-RUST-HXF. +fn is_permanent_rate_cap_error(msg: &str) -> bool { + crate::openhuman::inference::provider::is_provider_rate_cap_exceeded_message(msg) +} + /// True when an agent-run error means the configured chat model can't do tool /// calls at all — a permanent, user-actionable condition (pick a tool-capable /// model). Matches both the direct-provider body (` does not support diff --git a/src/openhuman/subconscious/engine_tests.rs b/src/openhuman/subconscious/engine_tests.rs index 2d7f978df4..62588132ab 100644 --- a/src/openhuman/subconscious/engine_tests.rs +++ b/src/openhuman/subconscious/engine_tests.rs @@ -152,3 +152,92 @@ fn render_world_diff_caps_items_and_falls_back_to_item_id() { assert!(rendered.contains("[added] item_0"), "uses item_id fallback"); assert!(rendered.contains("…and 3 more"), "caps the per-source list"); } + +// ── Rate-cap circuit breaker (TAURI-RUST-HXF) ─────────────────────────── + +#[test] +fn evaluate_rate_cap_halt_skip_resume_proceed() { + // No halt in effect → run normally. + assert_eq!( + evaluate_rate_cap_halt(None, "other:groq"), + RateCapHaltDecision::Proceed + ); + // Halt set for the same signature still in config → skip the doomed run. + assert_eq!( + evaluate_rate_cap_halt(Some("other:groq"), "other:groq"), + RateCapHaltDecision::Skip + ); + // Halt set but the user switched provider/model → clear it and resume. + assert_eq!( + evaluate_rate_cap_halt(Some("other:groq"), "cloud"), + RateCapHaltDecision::Resume + ); +} + +#[test] +fn permanent_rate_cap_error_matches_wrapped_groq_agent_error_only() { + // The verbatim wrapped agent-run error the tick surfaces (413/TPM) → + // permanent, so the breaker halts. + assert!(is_permanent_rate_cap_error( + r#"agent run: groq API error (413 Payload Too Large): {"error":{"message":"Request too large for model `openai/gpt-oss-120b` in organization `org_x` service tier `on_demand` on tokens per minute (TPM): Limit 8000, Requested 42084."}}"# + )); + // A transient 429 burst ("try again in Ns") must NOT halt — it stays + // retryable, so the two permanent-error arms never overlap. + assert!(!is_permanent_rate_cap_error( + "agent run: groq API error (429 Too Many Requests): Rate limit reached. Please try again in 2.5s." + )); + // A tool-capability error is a different permanent condition handled by its + // own arm, not the rate-cap breaker. + assert!(!is_permanent_rate_cap_error( + "agent run: No endpoints found that support tool use" + )); +} + +#[test] +fn subconscious_provider_signature_tracks_config_changes() { + // Default config routes to OpenHuman cloud. + let mut cfg = Config::default(); + assert_eq!(subconscious_provider_signature(&cfg), "cloud"); + + // A BYO provider override yields a distinct, stable signature. + cfg.subconscious_provider = Some("groq".to_string()); + let groq_sig = subconscious_provider_signature(&cfg); + assert_eq!(groq_sig, "other:groq"); + + // Switching the provider changes the signature — the breaker's cue to + // clear a halt and resume ticking. + cfg.subconscious_provider = Some("openai".to_string()); + assert_ne!(subconscious_provider_signature(&cfg), groq_sig); +} + +#[test] +fn rate_cap_halt_state_transitions() { + let mut state = EngineState { + last_tick_at: 0.0, + total_ticks: 0, + consecutive_failures: 0, + provider_unavailable_reason: None, + rate_cap_halt_signature: None, + }; + + // No halt armed → the tick proceeds (does not skip). + assert!(!state.should_skip_for_rate_cap_halt("other:groq")); + + // A permanent rate-cap failure arms the halt + actionable reason. + state.arm_rate_cap_halt("other:groq"); + assert_eq!(state.rate_cap_halt_signature.as_deref(), Some("other:groq")); + assert_eq!( + state.provider_unavailable_reason.as_deref(), + Some(RATE_CAP_HALT_REASON) + ); + + // Same config still set → skip the doomed run, and count the skipped tick. + let before = state.total_ticks; + assert!(state.should_skip_for_rate_cap_halt("other:groq")); + assert_eq!(state.total_ticks, before + 1); + + // User switched provider (signature changed) → clear halt + reason, resume. + assert!(!state.should_skip_for_rate_cap_halt("cloud")); + assert!(state.rate_cap_halt_signature.is_none()); + assert!(state.provider_unavailable_reason.is_none()); +}