crewAIInc · devin-ai-integration · Mar 27, 2026 · Mar 27, 2026
diff --git a/lib/crewai/src/crewai/agents/crew_agent_executor.py b/lib/crewai/src/crewai/agents/crew_agent_executor.py
@@ -10,6 +10,7 @@
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import contextvars
+import copy
 import inspect
 import logging
 from typing import TYPE_CHECKING, Any, Literal, cast
@@ -162,15 +163,23 @@ def __init__(
         self.before_llm_call_hooks.extend(get_before_llm_call_hooks())
         self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
         if self.llm:
-            # This may be mutating the shared llm object and needs further evaluation
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
+            # Create a shallow copy of the LLM to avoid mutating the shared
+            # instance's stop words. When multiple agents share the same LLM,
+            # directly mutating stop words causes cross-agent state pollution
+            # where stop words accumulate across agents. (see #5141)
+            existing_stop = getattr(self.llm, "stop", []) or []
+            merged_stop = list(
                 set(
                     existing_stop + self.stop
                     if isinstance(existing_stop, list)
                     else self.stop
                 )
             )
+            if merged_stop != (
+                existing_stop if isinstance(existing_stop, list) else []
+            ):
+                self.llm = copy.copy(self.llm)
+                self.llm.stop = merged_stop
 
     @property
     def use_stop_words(self) -> bool:

diff --git a/lib/crewai/src/crewai/experimental/agent_executor.py b/lib/crewai/src/crewai/experimental/agent_executor.py
@@ -4,6 +4,7 @@
 from collections.abc import Callable, Coroutine
 from concurrent.futures import ThreadPoolExecutor, as_completed
 import contextvars
+import copy
 from datetime import datetime
 import inspect
 import json
@@ -256,14 +257,23 @@ def __init__(
         self.after_llm_call_hooks.extend(get_after_llm_call_hooks())
 
         if self.llm:
-            existing_stop = getattr(self.llm, "stop", [])
-            self.llm.stop = list(
+            # Create a shallow copy of the LLM to avoid mutating the shared
+            # instance's stop words. When multiple agents share the same LLM,
+            # directly mutating stop words causes cross-agent state pollution
+            # where stop words accumulate across agents. (see #5141)
+            existing_stop = getattr(self.llm, "stop", []) or []
+            merged_stop = list(
                 set(
                     existing_stop + self.stop
                     if isinstance(existing_stop, list)
                     else self.stop
                 )
             )
+            if merged_stop != (
+                existing_stop if isinstance(existing_stop, list) else []
+            ):
+                self.llm = copy.copy(self.llm)
+                self.llm.stop = merged_stop
         self._state = AgentExecutorState()
 
         # Plan-and-Execute components (Phase 2)

diff --git a/lib/crewai/tests/agents/test_agent.py b/lib/crewai/tests/agents/test_agent.py
@@ -2421,3 +2421,84 @@ def test_agent_mcps_accepts_legacy_prefix_with_tool():
         mcps=["crewai-amp:notion#get_page"],
     )
     assert agent.mcps == ["crewai-amp:notion#get_page"]
+
+
+class TestCrewAgentExecutorSharedLLMStopWords:
+    """Regression tests for shared LLM stop words mutation in CrewAgentExecutor (issue #5141).
+
+    When multiple agents share the same LLM instance, each CrewAgentExecutor
+    should NOT mutate the shared LLM's stop words.
+    """
+
+    def _make_executor(self, llm, stop_words):
+        """Helper to create a CrewAgentExecutor with minimal deps."""
+        from crewai.agents.tools_handler import ToolsHandler
+
+        agent = Agent(
+            role="test role",
+            goal="test goal",
+            backstory="test backstory",
+        )
+        task = Task(
+            description="Test task",
+            expected_output="Test output",
+            agent=agent,
+        )
+        return CrewAgentExecutor(
+            agent=agent,
+            task=task,
+            llm=llm,
+            crew=None,
+            prompt={"prompt": "Test {input} {tool_names} {tools}"},
+            max_iter=5,
+            tools=[],
+            tools_names="",
+            stop_words=stop_words,
+            tools_description="",
+            tools_handler=ToolsHandler(),
+        )
+
+    def test_shared_llm_not_mutated(self):
+        """Creating a CrewAgentExecutor should NOT mutate the shared LLM's stop words."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        self._make_executor(shared_llm, stop_words=["Observation:"])
+
+        assert shared_llm.stop == original_stop
+
+    def test_multiple_executors_isolate_stop_words(self):
+        """Multiple executors sharing an LLM should each have isolated stop words."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        exec_a = self._make_executor(shared_llm, stop_words=["StopA:"])
+        exec_b = self._make_executor(shared_llm, stop_words=["StopB:"])
+
+        # Shared LLM must be unmodified
+        assert shared_llm.stop == original_stop
+
+        # Each executor should have its own LLM copy
+        assert exec_a.llm is not shared_llm
+        assert exec_b.llm is not shared_llm
+        assert exec_a.llm is not exec_b.llm
+
+        # exec_a should have Original: + StopA: only
+        assert "Original:" in exec_a.llm.stop
+        assert "StopA:" in exec_a.llm.stop
+        assert "StopB:" not in exec_a.llm.stop
+
+        # exec_b should have Original: + StopB: only
+        assert "Original:" in exec_b.llm.stop
+        assert "StopB:" in exec_b.llm.stop
+        assert "StopA:" not in exec_b.llm.stop
+
+    def test_stop_words_do_not_accumulate_across_kickoffs(self):
+        """Simulating multiple kickoffs: stop words must not grow on the shared LLM."""
+        shared_llm = LLM(model="gpt-4", stop=["Original:"])
+        original_stop = list(shared_llm.stop)
+
+        for _ in range(5):
+            self._make_executor(shared_llm, stop_words=["Observation:"])
+
+        assert shared_llm.stop == original_stop
diff --git a/lib/crewai/tests/agents/test_agent_executor.py b/lib/crewai/tests/agents/test_agent_executor.py
@@ -2002,3 +2002,150 @@ def test_anthropic_provider_has_image_block_converter(self):
         assert hasattr(AnthropicCompletion, "_convert_image_blocks"), (
             "Anthropic provider must have _convert_image_blocks for auto-conversion"
         )
+
+
+class TestSharedLLMStopWordsMutation:
+    """Regression tests for shared LLM stop words mutation (issue #5141).
+
+    When multiple agents share the same LLM instance, each executor should
+    NOT mutate the shared LLM's stop words. Instead, each executor should
+    get its own copy of the LLM with the merged stop words.
+    """
+
+    @pytest.fixture
+    def shared_llm(self):
+        """Create a shared mock LLM with initial stop words."""
+        llm = Mock()
+        llm.supports_stop_words.return_value = True
+        llm.stop = ["Original:"]
+        return llm
+
+    @pytest.fixture
+    def base_deps(self, shared_llm):
+        """Create base dependencies using the shared LLM."""
+        task = Mock()
+        task.description = "Test task"
+
+        crew = Mock()
+        crew.verbose = False
+        crew._train = False
+
+        prompt = {"prompt": "Test {input}"}
+
+        return {
+            "llm": shared_llm,
+            "task": task,
+            "crew": crew,
+            "prompt": prompt,
+            "max_iter": 10,
+            "tools": [],
+            "tools_names": "",
+            "tools_description": "",
+            "tools_handler": Mock(),
+        }
+
+    def test_shared_llm_not_mutated_by_executor(self, shared_llm, base_deps):
+        """Creating an executor should NOT mutate the shared LLM's stop words."""
+        original_stop = list(shared_llm.stop)
+
+        agent = Mock()
+        agent.id = "agent-1"
+        agent.role = "Agent 1"
+        agent.verbose = False
+        agent.key = "key-1"
+
+        AgentExecutor(
+            **base_deps,
+            agent=agent,
+            stop_words=["Observation:"],
+        )
+
+        # The shared LLM's stop words must remain unchanged
+        assert shared_llm.stop == original_stop
+
+    def test_multiple_executors_do_not_accumulate_stop_words(
+        self, shared_llm, base_deps
+    ):
+        """Multiple executors sharing an LLM should not accumulate stop words."""
+        original_stop = list(shared_llm.stop)
+
+        agent_a = Mock()
+        agent_a.id = "agent-a"
+        agent_a.role = "Agent A"
+        agent_a.verbose = False
+        agent_a.key = "key-a"
+
+        agent_b = Mock()
+        agent_b.id = "agent-b"
+        agent_b.role = "Agent B"
+        agent_b.verbose = False
+        agent_b.key = "key-b"
+
+        exec_a = AgentExecutor(
+            **base_deps,
+            agent=agent_a,
+            stop_words=["StopA:"],
+        )
+
+        exec_b = AgentExecutor(
+            **base_deps,
+            agent=agent_b,
+            stop_words=["StopB:"],
+        )
+
+        # Shared LLM must be unmodified
+        assert shared_llm.stop == original_stop
+
+        # Each executor should have its own LLM copy with the correct merged stop words
+        assert exec_a.llm is not shared_llm
+        assert exec_b.llm is not shared_llm
+        assert exec_a.llm is not exec_b.llm
+
+        # exec_a should have Original: + StopA:
+        assert "Original:" in exec_a.llm.stop
+        assert "StopA:" in exec_a.llm.stop
+        assert "StopB:" not in exec_a.llm.stop
+
+        # exec_b should have Original: + StopB:
+        assert "Original:" in exec_b.llm.stop
+        assert "StopB:" in exec_b.llm.stop
+        assert "StopA:" not in exec_b.llm.stop
+
+    def test_executor_no_copy_when_no_new_stop_words(self, shared_llm, base_deps):
+        """Executor should not copy LLM when stop_words don't add anything new."""
+        # The shared LLM already has ["Original:"]
+        agent = Mock()
+        agent.id = "agent-1"
+        agent.role = "Agent 1"
+        agent.verbose = False
+        agent.key = "key-1"
+
+        executor = AgentExecutor(
+            **base_deps,
+            agent=agent,
+            stop_words=[],  # No new stop words
+        )
+
+        # When no new stop words are added, the LLM should not be copied
+        assert executor.llm is shared_llm
+
+    def test_stop_words_persist_across_multiple_kickoffs(self, shared_llm, base_deps):
+        """Stop words should not accumulate across multiple executor creations
+        (simulating multiple crew.kickoff() calls)."""
+        original_stop = list(shared_llm.stop)
+
+        for i in range(5):
+            agent = Mock()
+            agent.id = f"agent-{i}"
+            agent.role = f"Agent {i}"
+            agent.verbose = False
+            agent.key = f"key-{i}"
+
+            AgentExecutor(
+                **base_deps,
+                agent=agent,
+                stop_words=["Observation:"],
+            )
+
+        # After 5 executor creations, the shared LLM's stop words must be unchanged
+        assert shared_llm.stop == original_stop