From 80a03a813303f51f66e22d8c730f0fa24544b8bc Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Wed, 21 Jan 2026 23:08:22 +1300 Subject: [PATCH 1/5] fix(openai): Handles Bedrock-style context overflow errors Adds logic to recognize and handle context window overflow errors that are returned in a Bedrock-style format by OpenAI-compatible endpoints that wrap Bedrock models. Handles OpenAI rate limit errors consistently Ensures rate limit errors from OpenAI are consistently treated as throttling exceptions across different code paths. This prevents potential misinterpretation of rate limits as context window overflows. Fixes Bedrock-style overflow detection when rate limited. --- src/strands/models/openai.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index d9266212b..bb1fe1f22 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -27,6 +27,14 @@ T = TypeVar("T", bound=BaseModel) +# Bedrock-style error messages that indicate context overflow +# (for OpenAI-compatible endpoints that wrap Bedrock models) +BEDROCK_STYLE_OVERFLOW_MESSAGES = [ + "Input is too long for requested model", + "input length and `max_tokens` exceed context limit", + "too many total text bytes", +] + class Client(Protocol): """Protocol defining the OpenAI-compatible interface for the underlying provider client.""" @@ -594,6 +602,14 @@ async def stream( # Rate limits (including TPM) require waiting/retrying, not context reduction logger.warning("OpenAI threw rate limit error") raise ModelThrottledException(str(e)) from e + except openai.APIError as e: + # Check for Bedrock-style error messages (for OpenAI-compatible endpoints wrapping Bedrock) + error_message = str(e) + if any(overflow_msg in error_message for overflow_msg in BEDROCK_STYLE_OVERFLOW_MESSAGES): + logger.warning("OpenAI endpoint threw Bedrock-style context window overflow error") + raise ContextWindowOverflowException(error_message) from e + # Re-raise other APIError exceptions + raise logger.debug("got response from model") yield self.format_chunk({"chunk_type": "message_start"}) @@ -717,6 +733,14 @@ async def structured_output( # Rate limits (including TPM) require waiting/retrying, not context reduction logger.warning("OpenAI threw rate limit error") raise ModelThrottledException(str(e)) from e + except openai.APIError as e: + # Check for Bedrock-style error messages (for OpenAI-compatible endpoints wrapping Bedrock) + error_message = str(e) + if any(overflow_msg in error_message for overflow_msg in BEDROCK_STYLE_OVERFLOW_MESSAGES): + logger.warning("OpenAI endpoint threw Bedrock-style context window overflow error") + raise ContextWindowOverflowException(error_message) from e + # Re-raise other APIError exceptions + raise parsed: T | None = None # Find the first choice with tool_calls From 1221917d956a080667e64f370c962005516d105f Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Thu, 22 Jan 2026 10:12:35 +1300 Subject: [PATCH 2/5] Generalises context overflow error handling Updates the model to recognize a wider range of context overflow error messages. This change expands error handling to include messages from various providers, ensuring more robust detection of context window overflows when using OpenAI-compatible endpoints. --- src/strands/models/openai.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index bb1fe1f22..b793ffca2 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -27,9 +27,9 @@ T = TypeVar("T", bound=BaseModel) -# Bedrock-style error messages that indicate context overflow -# (for OpenAI-compatible endpoints that wrap Bedrock models) -BEDROCK_STYLE_OVERFLOW_MESSAGES = [ +# Alternative context overflow error messages +# These are commonly returned by OpenAI-compatible endpoints wrapping other providers (e.g., Databricks serving Bedrock models) +CONTEXT_OVERFLOW_MESSAGES = [ "Input is too long for requested model", "input length and `max_tokens` exceed context limit", "too many total text bytes", @@ -603,10 +603,10 @@ async def stream( logger.warning("OpenAI threw rate limit error") raise ModelThrottledException(str(e)) from e except openai.APIError as e: - # Check for Bedrock-style error messages (for OpenAI-compatible endpoints wrapping Bedrock) + # Check for alternative context overflow error messages error_message = str(e) - if any(overflow_msg in error_message for overflow_msg in BEDROCK_STYLE_OVERFLOW_MESSAGES): - logger.warning("OpenAI endpoint threw Bedrock-style context window overflow error") + if any(overflow_msg in error_message for overflow_msg in CONTEXT_OVERFLOW_MESSAGES): + logger.warning("context window overflow error detected") raise ContextWindowOverflowException(error_message) from e # Re-raise other APIError exceptions raise @@ -734,10 +734,10 @@ async def structured_output( logger.warning("OpenAI threw rate limit error") raise ModelThrottledException(str(e)) from e except openai.APIError as e: - # Check for Bedrock-style error messages (for OpenAI-compatible endpoints wrapping Bedrock) + # Check for alternative context overflow error messages error_message = str(e) - if any(overflow_msg in error_message for overflow_msg in BEDROCK_STYLE_OVERFLOW_MESSAGES): - logger.warning("OpenAI endpoint threw Bedrock-style context window overflow error") + if any(overflow_msg in error_message for overflow_msg in CONTEXT_OVERFLOW_MESSAGES): + logger.warning("context window overflow error detected") raise ContextWindowOverflowException(error_message) from e # Re-raise other APIError exceptions raise From 45c30bdcd7098eccf65af35a706b5b4cfc68b294 Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Thu, 22 Jan 2026 10:30:51 +1300 Subject: [PATCH 3/5] Added unit tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. test_stream_alternative_context_overflow_messages (3 parametrized tests) - Tests that stream() method properly converts APIError with alternative overflow messages to ContextWindowOverflowException - Parametrized to test all 3 error message patterns 2. test_structured_output_alternative_context_overflow_messages (3 parametrized tests) - Tests that structured_output() method properly converts APIError with alternative overflow messages - Parametrized to test all 3 error message patterns 3. test_stream_api_error_passthrough (1 test) - Tests that APIError without overflow messages passes through unchanged - Ensures we don't accidentally catch unrelated API errors Test Coverage The new tests verify: - ✅ All 3 alternative overflow messages are detected - ✅ Both stream() and structured_output() methods handle the errors - ✅ Errors are properly converted to ContextWindowOverflowException - ✅ Original exception is preserved as __cause__ - ✅ Non-overflow APIError exceptions pass through unchanged --- tests/strands/models/test_openai.py | 86 +++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/tests/strands/models/test_openai.py b/tests/strands/models/test_openai.py index 7c1d18998..849672af0 100644 --- a/tests/strands/models/test_openai.py +++ b/tests/strands/models/test_openai.py @@ -1034,6 +1034,92 @@ async def test_stream_context_overflow_exception(openai_client, model, messages) assert exc_info.value.__cause__ == mock_error +@pytest.mark.asyncio +@pytest.mark.parametrize( + "error_message", + [ + "Input is too long for requested model", + "input length and `max_tokens` exceed context limit", + "too many total text bytes", + ], +) +async def test_stream_alternative_context_overflow_messages(openai_client, model, messages, error_message): + """Test that alternative context overflow messages in APIError are properly converted.""" + # Create a mock OpenAI APIError with alternative context overflow message + mock_error = openai.APIError( + message=error_message, + request=unittest.mock.MagicMock(), + body={"error": {"message": error_message}}, + ) + + # Configure the mock client to raise the APIError + openai_client.chat.completions.create.side_effect = mock_error + + # Test that the stream method converts the error properly + with pytest.raises(ContextWindowOverflowException) as exc_info: + async for _ in model.stream(messages): + pass + + # Verify the exception message contains the original error + assert error_message in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "error_message", + [ + "Input is too long for requested model", + "input length and `max_tokens` exceed context limit", + "too many total text bytes", + ], +) +async def test_structured_output_alternative_context_overflow_messages( + openai_client, model, messages, test_output_model_cls, error_message +): + """Test that alternative context overflow messages in APIError are properly converted in structured output.""" + # Create a mock OpenAI APIError with alternative context overflow message + mock_error = openai.APIError( + message=error_message, + request=unittest.mock.MagicMock(), + body={"error": {"message": error_message}}, + ) + + # Configure the mock client to raise the APIError + openai_client.beta.chat.completions.parse.side_effect = mock_error + + # Test that the structured_output method converts the error properly + with pytest.raises(ContextWindowOverflowException) as exc_info: + async for _ in model.structured_output(test_output_model_cls, messages): + pass + + # Verify the exception message contains the original error + assert error_message in str(exc_info.value) + assert exc_info.value.__cause__ == mock_error + + +@pytest.mark.asyncio +async def test_stream_api_error_passthrough(openai_client, model, messages): + """Test that APIError without overflow messages passes through unchanged.""" + # Create a mock OpenAI APIError without overflow message + mock_error = openai.APIError( + message="Some other API error", + request=unittest.mock.MagicMock(), + body={"error": {"message": "Some other API error"}}, + ) + + # Configure the mock client to raise the APIError + openai_client.chat.completions.create.side_effect = mock_error + + # Test that APIError without overflow messages passes through + with pytest.raises(openai.APIError) as exc_info: + async for _ in model.stream(messages): + pass + + # Verify the original exception is raised, not ContextWindowOverflowException + assert exc_info.value == mock_error + + @pytest.mark.asyncio async def test_stream_other_bad_request_errors_passthrough(openai_client, model, messages): """Test that other BadRequestError exceptions are not converted to ContextWindowOverflowException.""" From 701f4662d7a54b3dd64e3dc1e6fa05fc9aaf342a Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Thu, 29 Jan 2026 15:49:46 +1300 Subject: [PATCH 4/5] Fix linting These messages are often returned by OpenAI-compatible endpoints that wrap other providers. --- src/strands/models/openai.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index b793ffca2..df55265e8 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -28,7 +28,8 @@ T = TypeVar("T", bound=BaseModel) # Alternative context overflow error messages -# These are commonly returned by OpenAI-compatible endpoints wrapping other providers (e.g., Databricks serving Bedrock models) +# These are commonly returned by OpenAI-compatible endpoints wrapping other providers +# (e.g., Databricks serving Bedrock models) CONTEXT_OVERFLOW_MESSAGES = [ "Input is too long for requested model", "input length and `max_tokens` exceed context limit", From 86ed9c88dbd167a7f28649754e5779504e275f38 Mon Sep 17 00:00:00 2001 From: Dinindu Suriyamudali Date: Tue, 3 Feb 2026 12:28:36 +1300 Subject: [PATCH 5/5] Mark context overflow messages as internal --- src/strands/models/openai.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/strands/models/openai.py b/src/strands/models/openai.py index df55265e8..88fd91003 100644 --- a/src/strands/models/openai.py +++ b/src/strands/models/openai.py @@ -30,7 +30,7 @@ # Alternative context overflow error messages # These are commonly returned by OpenAI-compatible endpoints wrapping other providers # (e.g., Databricks serving Bedrock models) -CONTEXT_OVERFLOW_MESSAGES = [ +_CONTEXT_OVERFLOW_MESSAGES = [ "Input is too long for requested model", "input length and `max_tokens` exceed context limit", "too many total text bytes", @@ -606,7 +606,7 @@ async def stream( except openai.APIError as e: # Check for alternative context overflow error messages error_message = str(e) - if any(overflow_msg in error_message for overflow_msg in CONTEXT_OVERFLOW_MESSAGES): + if any(overflow_msg in error_message for overflow_msg in _CONTEXT_OVERFLOW_MESSAGES): logger.warning("context window overflow error detected") raise ContextWindowOverflowException(error_message) from e # Re-raise other APIError exceptions @@ -737,7 +737,7 @@ async def structured_output( except openai.APIError as e: # Check for alternative context overflow error messages error_message = str(e) - if any(overflow_msg in error_message for overflow_msg in CONTEXT_OVERFLOW_MESSAGES): + if any(overflow_msg in error_message for overflow_msg in _CONTEXT_OVERFLOW_MESSAGES): logger.warning("context window overflow error detected") raise ContextWindowOverflowException(error_message) from e # Re-raise other APIError exceptions