From 257a77d5fe24e3eb46d3109abe870e411ec810aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=86=A0=E8=BE=B0?= Date: Wed, 1 Jul 2026 02:06:06 +0800 Subject: [PATCH 1/4] fix(agent): list available tools for unknown calls --- .../agent/harness/engine/tool_source.rs | 5 ++ src/openhuman/agent/harness/engine/tools.rs | 33 ++++++++- .../agent/harness/session/agent_tool_exec.rs | 68 ++++++++++++++++++- .../subagent_runner/ops/tool_source.rs | 20 ++++-- .../agent/harness/test_support_tests.rs | 47 +++++++++++++ 5 files changed, 164 insertions(+), 9 deletions(-) diff --git a/src/openhuman/agent/harness/engine/tool_source.rs b/src/openhuman/agent/harness/engine/tool_source.rs index 5e84a347c6..3afbe41ec8 100644 --- a/src/openhuman/agent/harness/engine/tool_source.rs +++ b/src/openhuman/agent/harness/engine/tool_source.rs @@ -23,6 +23,7 @@ use async_trait::async_trait; use super::super::payload_summarizer::PayloadSummarizer; use super::progress::ProgressReporter; +use super::tools::sorted_tool_names; use super::{run_one_tool, ToolRunResult}; use crate::openhuman::agent::harness::parse::ParsedToolCall; use crate::openhuman::agent_tool_policy::ToolPolicySession; @@ -76,6 +77,7 @@ pub(crate) struct RegistryToolSource<'a> { tool_policy: &'a dyn ToolPolicy, payload_summarizer: Option<&'a dyn PayloadSummarizer>, specs: Vec, + available_tool_names: Vec, } impl<'a> RegistryToolSource<'a> { @@ -98,6 +100,7 @@ impl<'a> RegistryToolSource<'a> { .map(|tool| tool.spec()) .collect(); let specs = crate::openhuman::agent::harness::session::dedup_visible_tool_specs(filtered); + let available_tool_names = sorted_tool_names(specs.iter().map(|spec| spec.name.as_str())); Self { registry, extra, @@ -105,6 +108,7 @@ impl<'a> RegistryToolSource<'a> { tool_policy, payload_summarizer, specs, + available_tool_names, } } @@ -143,6 +147,7 @@ impl ToolSource for RegistryToolSource<'_> { self.tool_policy, self.payload_summarizer, progress_call_id, + &self.available_tool_names, crate::openhuman::tokenjuice::AgentTokenjuiceCompression::Full, ) .await diff --git a/src/openhuman/agent/harness/engine/tools.rs b/src/openhuman/agent/harness/engine/tools.rs index e11bd727a8..7e1ca30e87 100644 --- a/src/openhuman/agent/harness/engine/tools.rs +++ b/src/openhuman/agent/harness/engine/tools.rs @@ -47,6 +47,35 @@ pub(crate) struct ToolRunResult { /// child process, whereas the harness backstop merely drops the future. const TOOL_TIMEOUT_GRACE_SECS: u64 = 5; +pub(crate) fn sorted_tool_names<'a>(names: impl IntoIterator) -> Vec { + let mut names: Vec = names + .into_iter() + .filter(|name| !name.trim().is_empty()) + .map(ToOwned::to_owned) + .collect(); + names.sort_unstable(); + names.dedup(); + names +} + +pub(crate) fn format_available_tools_hint(available_tool_names: &[String]) -> String { + if available_tool_names.is_empty() { + "No tools are available in this turn.".to_string() + } else { + format!("Available tools: {}", available_tool_names.join(", ")) + } +} + +pub(crate) fn format_unknown_tool_message( + tool_name: &str, + available_tool_names: &[String], +) -> String { + format!( + "Unknown tool: {tool_name}. {}", + format_available_tools_hint(available_tool_names) + ) +} + /// Map a [`ToolTimeout`] policy to `(deadline, effective_secs)` for /// [`run_one_tool`]. `deadline` is `None` for an unbounded run (no harness /// timeout); `effective_secs` is the value surfaced in the timeout message @@ -90,6 +119,7 @@ pub(crate) async fn run_one_tool( tool_policy: &dyn ToolPolicy, payload_summarizer: Option<&dyn PayloadSummarizer>, progress_call_id: &str, + available_tool_names: &[String], tokenjuice_compression: crate::openhuman::tokenjuice::AgentTokenjuiceCompression, ) -> ToolRunResult { let iteration_u32 = (iteration + 1) as u32; @@ -162,9 +192,10 @@ pub(crate) async fn run_one_tool( tracing::warn!( iteration, tool = call.name.as_str(), + available_tool_count = available_tool_names.len(), "[agent_loop] unknown tool requested" ); - let msg = format!("Unknown tool: {}", call.name); + let msg = format_unknown_tool_message(&call.name, available_tool_names); emit_failed_completion(&msg).await; return ToolRunResult { text: msg, diff --git a/src/openhuman/agent/harness/session/agent_tool_exec.rs b/src/openhuman/agent/harness/session/agent_tool_exec.rs index 305c8dcb95..56a2d1f710 100644 --- a/src/openhuman/agent/harness/session/agent_tool_exec.rs +++ b/src/openhuman/agent/harness/session/agent_tool_exec.rs @@ -12,6 +12,9 @@ use std::collections::HashSet; use crate::core::event_bus::{publish_global, DomainEvent}; use crate::openhuman::agent::dispatcher::{ParsedToolCall, ToolExecutionResult}; +use crate::openhuman::agent::harness::engine::tools::{ + format_available_tools_hint, format_unknown_tool_message, sorted_tool_names, +}; use crate::openhuman::agent::harness::engine::ProgressReporter; use crate::openhuman::agent::harness::payload_summarizer::PayloadSummarizer; use crate::openhuman::agent::harness::tool_result_artifacts::{ @@ -46,6 +49,14 @@ pub(super) struct AgentToolExecCtx<'a> { pub artifact_store: Option<&'a ToolResultArtifactStore>, } +fn available_tool_names_for_ctx(ctx: &AgentToolExecCtx<'_>) -> Vec { + if ctx.visible_tool_names.is_empty() { + sorted_tool_names(ctx.tools.iter().map(|tool| tool.name())) + } else { + sorted_tool_names(ctx.visible_tool_names.iter().map(|name| name.as_str())) + } +} + /// Execute one parsed tool call end-to-end with the Agent's semantics, emitting /// `ToolCallStarted` / `ToolCallCompleted` through `progress`. Returns the /// result (for history formatting) + the call record (for post-turn hooks). @@ -98,8 +109,13 @@ pub(super) async fn run_agent_tool_call( "[agent] blocked tool call '{}' — not in visible tool set", call.name ); + let available = available_tool_names_for_ctx(ctx); ( - format!("Tool '{}' is not available to this agent", call.name), + format!( + "Tool '{}' is not available to this agent. {}", + call.name, + format_available_tools_hint(&available) + ), false, ) } else if let Some(tool) = ctx.tools.iter().find(|t| t.name() == call.name) { @@ -300,7 +316,8 @@ pub(super) async fn run_agent_tool_call( } } } else { - (format!("Unknown tool: {}", call.name), false) + let available = available_tool_names_for_ctx(ctx); + (format_unknown_tool_message(&call.name, &available), false) }; // Stage 1a — content-aware compaction via the TokenJuice content router. @@ -452,6 +469,53 @@ mod tests { } } + #[tokio::test(flavor = "current_thread")] + async fn session_tool_executor_unknown_tool_lists_available_tools() { + let tools: Vec> = vec![Box::new(HangingTool)]; + let visible_tool_names = HashSet::new(); + let policy_session = ToolPolicyEngine::build_session( + "context_scout", + "web", + "test", + &HashMap::new(), + &tools, + &visible_tool_names, + ); + let tool_policy = AllowAllToolPolicy; + let ctx = AgentToolExecCtx { + tools: &tools, + visible_tool_names: &visible_tool_names, + tool_policy_session: &policy_session, + tool_policy: &tool_policy, + payload_summarizer: None, + event_session_id: "session-1", + event_channel: "web", + agent_definition_id: "context_scout", + prefer_markdown: false, + budget_bytes: 4096, + compaction_enabled: false, + tokenjuice_compression: crate::openhuman::tokenjuice::AgentTokenjuiceCompression::Off, + artifact_store: None, + }; + let call = ParsedToolCall { + name: "search_files".to_string(), + arguments: json!({}), + tool_call_id: Some("call-unknown".to_string()), + }; + let progress = TestProgress { + completed: AtomicUsize::new(0), + timeout_completions: AtomicUsize::new(0), + }; + + let (result, record) = run_agent_tool_call(&ctx, &progress, &call, 0).await; + + assert!(!result.success); + assert!(result.output.contains("Unknown tool: search_files")); + assert!(result.output.contains("Available tools: memory_tree")); + assert!(!record.success); + assert_eq!(progress.completed.load(Ordering::Relaxed), 1); + } + #[tokio::test(flavor = "current_thread")] async fn session_tool_executor_enforces_tool_timeout_policy() { let tools: Vec> = vec![Box::new(HangingTool)]; diff --git a/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs b/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs index cce0fc5545..e69d4c0105 100644 --- a/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs +++ b/src/openhuman/agent/harness/subagent_runner/ops/tool_source.rs @@ -9,6 +9,7 @@ use std::collections::HashSet; +use crate::openhuman::agent::harness::engine::tools::sorted_tool_names; use crate::openhuman::tools::{Tool, ToolSpec}; use super::handoff_helper::apply_handoff; @@ -33,6 +34,16 @@ pub(super) struct SubagentToolSource<'a> { pub(super) tokenjuice_compression: crate::openhuman::tokenjuice::AgentTokenjuiceCompression, } +impl SubagentToolSource<'_> { + fn available_tool_names(&self) -> Vec { + let mut available: Vec<&str> = self.allowed_names.iter().map(|s| s.as_str()).collect(); + if let Some(resolver) = self.lazy_resolver.as_ref() { + available.extend(resolver.known_slugs()); + } + sorted_tool_names(available) + } +} + #[async_trait::async_trait] impl super::super::super::engine::ToolSource for SubagentToolSource<'_> { fn request_specs(&self) -> &[ToolSpec] { @@ -84,12 +95,7 @@ impl super::super::super::engine::ToolSource for SubagentToolSource<'_> { None, ) .await; - let mut available: Vec<&str> = self.allowed_names.iter().map(|s| s.as_str()).collect(); - if let Some(resolver) = self.lazy_resolver.as_ref() { - available.extend(resolver.known_slugs()); - } - available.sort_unstable(); - available.dedup(); + let available = self.available_tool_names(); let text = format!( "Error: tool '{}' is not available to the {} sub-agent. Available tools: {}", call.name, @@ -111,6 +117,7 @@ impl super::super::super::engine::ToolSource for SubagentToolSource<'_> { .find(|t| t.name() == call.name) .or_else(|| self.parent_tools.iter().find(|t| t.name() == call.name)) .map(|b| b.as_ref()); + let available_tool_names = self.available_tool_names(); let outcome = super::super::super::engine::run_one_tool( tool_opt, call, @@ -119,6 +126,7 @@ impl super::super::super::engine::ToolSource for SubagentToolSource<'_> { &self.policy, None, progress_call_id, + &available_tool_names, self.tokenjuice_compression, ) .await; diff --git a/src/openhuman/agent/harness/test_support_tests.rs b/src/openhuman/agent/harness/test_support_tests.rs index 04a2278e0f..3d38b4a3c5 100644 --- a/src/openhuman/agent/harness/test_support_tests.rs +++ b/src/openhuman/agent/harness/test_support_tests.rs @@ -942,6 +942,53 @@ async fn keyword_provider_unknown_tool_surfaces_error_and_loop_continues() { assert!(history.iter().any(|m| m.content.contains("Unknown tool"))); } +#[tokio::test] +async fn keyword_provider_unknown_tool_lists_available_tools() { + let provider = KeywordScriptedProvider::new(vec![ + KeywordRule::tool_call("go", ScriptedToolCall::new("search_files", json!({}))), + KeywordRule::final_reply("Available tools: echo, lookup", "Corrected."), + ]); + + let (echo_tool, echo_calls) = RecordingTool::echo("echo"); + let (lookup_tool, lookup_calls) = RecordingTool::echo("lookup"); + let tools: Vec> = vec![Box::new(echo_tool), Box::new(lookup_tool)]; + + let mut history = vec![ChatMessage::user("go please")]; + + let out = run_tool_call_loop( + &provider, + &mut history, + &tools, + "mock", + "test-model", + 0.0, + true, + "channel", + &mm(), + &mff(), + 5, + None, + None, + &[], + None, + None, + &crate::openhuman::tools::policy::DefaultToolPolicy, + ) + .await + .unwrap(); + + assert_eq!(out, "Corrected."); + assert!(echo_calls.lock().is_empty()); + assert!(lookup_calls.lock().is_empty()); + let joined = history + .iter() + .map(|message| message.content.as_str()) + .collect::>() + .join("\n"); + assert!(joined.contains("Unknown tool: search_files")); + assert!(joined.contains("Available tools: echo, lookup")); +} + // ── 5. Max iterations guard ─────────────────────────────────────── #[tokio::test] From 0e5a191f0cd79937032ba4ebd77e1687c541e279 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=86=A0=E8=BE=B0?= Date: Wed, 1 Jul 2026 02:40:27 +0800 Subject: [PATCH 2/4] fix(agent): filter unknown tool hints by policy --- .../agent/harness/session/agent_tool_exec.rs | 116 +++++++++++++++++- 1 file changed, 112 insertions(+), 4 deletions(-) diff --git a/src/openhuman/agent/harness/session/agent_tool_exec.rs b/src/openhuman/agent/harness/session/agent_tool_exec.rs index 56a2d1f710..f182f07be8 100644 --- a/src/openhuman/agent/harness/session/agent_tool_exec.rs +++ b/src/openhuman/agent/harness/session/agent_tool_exec.rs @@ -50,11 +50,30 @@ pub(super) struct AgentToolExecCtx<'a> { } fn available_tool_names_for_ctx(ctx: &AgentToolExecCtx<'_>) -> Vec { - if ctx.visible_tool_names.is_empty() { - sorted_tool_names(ctx.tools.iter().map(|tool| tool.name())) - } else { - sorted_tool_names(ctx.visible_tool_names.iter().map(|name| name.as_str())) + let mut names = Vec::new(); + let mut filtered_out = Vec::new(); + for tool in ctx.tools { + let name = tool.name(); + let visible_by_scope = + ctx.visible_tool_names.is_empty() || ctx.visible_tool_names.contains(name); + let allowed_by_policy = ctx.tool_policy_session.is_allowed(name); + + if visible_by_scope && allowed_by_policy { + names.push(name); + } else if visible_by_scope { + filtered_out.push(name.to_string()); + } } + + if !filtered_out.is_empty() { + log::debug!( + "[agent] filtered unavailable tools from unknown-tool hint channel={} tools={:?}", + ctx.event_channel, + filtered_out + ); + } + + sorted_tool_names(names.into_iter()) } /// Execute one parsed tool call end-to-end with the Agent's semantics, emitting @@ -415,6 +434,7 @@ mod tests { use crate::openhuman::agent::tool_policy::AllowAllToolPolicy; use crate::openhuman::agent_tool_policy::ToolPolicyEngine; use crate::openhuman::tools::traits::{ToolResult, ToolTimeout}; + use crate::openhuman::tools::PermissionLevel; use async_trait::async_trait; use serde_json::json; use std::collections::{HashMap, HashSet}; @@ -422,6 +442,11 @@ mod tests { use std::time::Duration; struct HangingTool; + struct PermissionedTool { + name: &'static str, + permission: PermissionLevel, + } + struct TestProgress { completed: AtomicUsize, timeout_completions: AtomicUsize, @@ -469,6 +494,29 @@ mod tests { } } + #[async_trait] + impl Tool for PermissionedTool { + fn name(&self) -> &str { + self.name + } + + fn description(&self) -> &str { + "test permissioned tool" + } + + fn parameters_schema(&self) -> serde_json::Value { + json!({ "type": "object", "properties": {} }) + } + + async fn execute(&self, _args: serde_json::Value) -> anyhow::Result { + Ok(ToolResult::success("ok")) + } + + fn permission_level(&self) -> PermissionLevel { + self.permission + } + } + #[tokio::test(flavor = "current_thread")] async fn session_tool_executor_unknown_tool_lists_available_tools() { let tools: Vec> = vec![Box::new(HangingTool)]; @@ -516,6 +564,66 @@ mod tests { assert_eq!(progress.completed.load(Ordering::Relaxed), 1); } + #[tokio::test(flavor = "current_thread")] + async fn session_tool_executor_unknown_tool_hint_uses_policy_filtered_tools() { + let tools: Vec> = vec![ + Box::new(PermissionedTool { + name: "read_notes", + permission: PermissionLevel::ReadOnly, + }), + Box::new(PermissionedTool { + name: "write_notes", + permission: PermissionLevel::Write, + }), + ]; + let visible_tool_names = HashSet::new(); + let channel_permissions = HashMap::from([("web".to_string(), "readonly".to_string())]); + let policy_session = ToolPolicyEngine::build_session( + "context_scout", + "web", + "test", + &channel_permissions, + &tools, + &visible_tool_names, + ); + let tool_policy = AllowAllToolPolicy; + let ctx = AgentToolExecCtx { + tools: &tools, + visible_tool_names: &visible_tool_names, + tool_policy_session: &policy_session, + tool_policy: &tool_policy, + payload_summarizer: None, + event_session_id: "session-1", + event_channel: "web", + agent_definition_id: "context_scout", + prefer_markdown: false, + budget_bytes: 4096, + compaction_enabled: false, + tokenjuice_compression: crate::openhuman::tokenjuice::AgentTokenjuiceCompression::Off, + artifact_store: None, + }; + let call = ParsedToolCall { + name: "missing_tool".to_string(), + arguments: json!({}), + tool_call_id: Some("call-unknown".to_string()), + }; + let progress = TestProgress { + completed: AtomicUsize::new(0), + timeout_completions: AtomicUsize::new(0), + }; + + let (result, _record) = run_agent_tool_call(&ctx, &progress, &call, 0).await; + + assert!(!result.success); + assert!(result.output.contains("Unknown tool: missing_tool")); + assert!(result.output.contains("Available tools: read_notes")); + assert!( + !result.output.contains("write_notes"), + "unknown-tool hints must not advertise policy-denied tools: {}", + result.output + ); + } + #[tokio::test(flavor = "current_thread")] async fn session_tool_executor_enforces_tool_timeout_policy() { let tools: Vec> = vec![Box::new(HangingTool)]; From f44006704a38c461b5323e661bc13a0766c08448 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=86=A0=E8=BE=B0?= Date: Wed, 1 Jul 2026 08:32:28 +0800 Subject: [PATCH 3/4] test(agent): keep raw coverage assertions budget-stable --- tests/agent_session_turn_raw_coverage_e2e.rs | 32 +++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/tests/agent_session_turn_raw_coverage_e2e.rs b/tests/agent_session_turn_raw_coverage_e2e.rs index 7eca68a1b3..3d55c27505 100644 --- a/tests/agent_session_turn_raw_coverage_e2e.rs +++ b/tests/agent_session_turn_raw_coverage_e2e.rs @@ -821,6 +821,22 @@ async fn turn_xml_failures_checkpoint_policy_visibility_and_hooks_are_publicly_e let hooks = hook_calls.lock().await; assert_eq!(hooks[0].assistant_response, checkpoint); assert_eq!(hooks[0].tool_calls.len(), 6); + let tool_names = hooks[0] + .tool_calls + .iter() + .map(|record| record.name.as_str()) + .collect::>(); + assert_eq!( + tool_names, + vec![ + "hidden_tool", + "cli_only", + "round17_error", + "round17_boom", + "round17_write", + "round17_ok", + ] + ); let joined = provider .requests() @@ -829,7 +845,21 @@ async fn turn_xml_failures_checkpoint_policy_visibility_and_hooks_are_publicly_e .map(|message| message.content) .collect::>() .join("\n"); - assert!(joined.contains("not available to this agent")); + assert!(joined.contains("")); + let allowed_tool_lines = joined + .lines() + .filter(|line| line.starts_with("- Allowed tools:")) + .collect::>(); + assert!( + !allowed_tool_lines.is_empty(), + "expected policy boundary allowed-tools line in provider context" + ); + assert!( + allowed_tool_lines + .iter() + .all(|line| !line.contains("round17_write")), + "write tool should not be advertised as policy-allowed: {allowed_tool_lines:?}" + ); assert!(joined.contains("semantic failure")); assert!(joined.contains("Error executing round17_boom")); assert!(joined.contains("denied by policy 'round17-deny'")); From 38787103fb2e7ffea97f99de5d57bd656b79b594 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=9D=8E=E5=86=A0=E8=BE=B0?= Date: Thu, 2 Jul 2026 17:06:33 +0800 Subject: [PATCH 4/4] chore(ci): retry flaky checks