diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json
index 7363a41aab..f1ca8d2230 100644
--- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json
+++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/azure.json
@@ -61,6 +61,14 @@
       "title": "Embedding Batch Size",
       "default": 5
     },
+    "max_retries": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "title": "Max Retries",
+      "default": 3,
+      "description": "The maximum number of times to retry a request if it fails."
+    },
     "timeout": {
       "type": "number",
       "minimum": 0,
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/bedrock.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/bedrock.json
index b71028148c..81829a0f49 100644
--- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/bedrock.json
+++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/bedrock.json
@@ -43,8 +43,8 @@
         "minimum": 0,
         "multipleOf": 1,
         "title": "Max Retries",
-        "default": 5,
-        "description": "Maximum number of retries to attempt when a request fails."
+        "default": 3,
+        "description": "The maximum number of times to retry a request if it fails."
       },
       "timeout": {
         "type": "number",
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json
index fe292f683c..8dd9bfa1c3 100644
--- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json
+++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/ollama.json
@@ -31,6 +31,14 @@
       "multipleOf": 1,
       "title": "Embed Batch Size",
       "default": 10
+    },
+    "max_retries": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "title": "Max Retries",
+      "default": 3,
+      "description": "The maximum number of times to retry a request if it fails."
     }
   }
 }
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json
index 9be724e41f..3ad21d3564 100644
--- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json
+++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/openai.json
@@ -44,6 +44,14 @@
       "title": "Embed Batch Size",
       "default": 10
     },
+    "max_retries": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "title": "Max Retries",
+      "default": 3,
+      "description": "The maximum number of times to retry a request if it fails."
+    },
     "timeout": {
       "type": "number",
       "minimum": 0,
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json
index 1534fcf93d..6aa48e883f 100644
--- a/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json
+++ b/unstract/sdk1/src/unstract/sdk1/adapters/embedding1/static/vertexai.json
@@ -57,6 +57,14 @@
             "retrieval"
           ],
           "default": "default"
+      },
+      "max_retries": {
+        "type": "number",
+        "minimum": 0,
+        "multipleOf": 1,
+        "title": "Max Retries",
+        "default": 3,
+        "description": "The maximum number of times to retry a request if it fails."
       }
     }
   }
diff --git a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json
index 3800814c77..3c8a4a5f16 100644
--- a/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json
+++ b/unstract/sdk1/src/unstract/sdk1/adapters/llm1/static/ollama.json
@@ -48,6 +48,14 @@
       "default": 3900,
       "description": "The maximum number of context tokens for the model."
     },
+    "max_retries": {
+      "type": "number",
+      "minimum": 0,
+      "multipleOf": 1,
+      "title": "Max Retries",
+      "default": 3,
+      "description": "The maximum number of times to retry a request if it fails."
+    },
     "request_timeout": {
       "type": "number",
       "minimum": 0,
diff --git a/unstract/sdk1/src/unstract/sdk1/embedding.py b/unstract/sdk1/src/unstract/sdk1/embedding.py
index 0677473852..4e30c6201c 100644
--- a/unstract/sdk1/src/unstract/sdk1/embedding.py
+++ b/unstract/sdk1/src/unstract/sdk1/embedding.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import logging
 import os
 from typing import TYPE_CHECKING
 
@@ -14,10 +15,18 @@
 from unstract.sdk1.exceptions import SdkError, parse_litellm_err
 from unstract.sdk1.platform import PlatformHelper
 from unstract.sdk1.utils.callback_manager import CallbackManager
+from unstract.sdk1.utils.retry_utils import (
+    acall_with_retry,
+    call_with_retry,
+    is_retryable_litellm_error,
+    pop_litellm_retry_kwargs,
+)
 
 if TYPE_CHECKING:
     from unstract.sdk1.tool.base import BaseTool
 
+logger = logging.getLogger(__name__)
+
 litellm.drop_params = True
 
 
@@ -115,9 +124,14 @@ def get_embedding(self, text: str) -> list[float]:
         try:
             kwargs = self.kwargs.copy()
             model = kwargs.pop("model")
+            max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info())
 
-            resp = litellm.embedding(model=model, input=[text], **kwargs)
-
+            resp = call_with_retry(
+                lambda: litellm.embedding(model=model, input=[text], **kwargs),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
+            )
             return resp["data"][0]["embedding"]
         except Exception as e:
             raise parse_litellm_err(e, self._get_adapter_info()) from e
@@ -127,9 +141,14 @@ def get_embeddings(self, texts: list[str]) -> list[list[float]]:
         try:
             kwargs = self.kwargs.copy()
             model = kwargs.pop("model")
+            max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info())
 
-            resp = litellm.embedding(model=model, input=texts, **kwargs)
-
+            resp = call_with_retry(
+                lambda: litellm.embedding(model=model, input=texts, **kwargs),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
+            )
             return [data["embedding"] for data in resp["data"]]
         except Exception as e:
             raise parse_litellm_err(e, self._get_adapter_info()) from e
@@ -139,26 +158,34 @@ async def get_aembedding(self, text: str) -> list[float]:
         try:
             kwargs = self.kwargs.copy()
             model = kwargs.pop("model")
+            max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info())
 
-            resp = await litellm.aembedding(model=model, input=[text], **kwargs)
-
+            resp = await acall_with_retry(
+                lambda: litellm.aembedding(model=model, input=[text], **kwargs),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
+            )
             return resp["data"][0]["embedding"]
         except Exception as e:
-            provider_name = f"{self.adapter.get_name()}"
-            raise parse_litellm_err(e, provider_name) from e
+            raise parse_litellm_err(e, self._get_adapter_info()) from e
 
     async def get_aembeddings(self, texts: list[str]) -> list[list[float]]:
         """Return async embedding vectors for list of query strings."""
         try:
             kwargs = self.kwargs.copy()
             model = kwargs.pop("model")
+            max_retries = pop_litellm_retry_kwargs(kwargs, self._get_adapter_info())
 
-            resp = await litellm.aembedding(model=model, input=texts, **kwargs)
-
+            resp = await acall_with_retry(
+                lambda: litellm.aembedding(model=model, input=texts, **kwargs),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
+            )
             return [data["embedding"] for data in resp["data"]]
         except Exception as e:
-            provider_name = f"{self.adapter.get_name()}"
-            raise parse_litellm_err(e, provider_name) from e
+            raise parse_litellm_err(e, self._get_adapter_info()) from e
 
     def test_connection(self) -> bool:
         """Test connection to the embedding provider."""
diff --git a/unstract/sdk1/src/unstract/sdk1/llm.py b/unstract/sdk1/src/unstract/sdk1/llm.py
index 8ff29a89d5..c1730e6613 100644
--- a/unstract/sdk1/src/unstract/sdk1/llm.py
+++ b/unstract/sdk1/src/unstract/sdk1/llm.py
@@ -24,6 +24,13 @@
     TokenCounterCompat,
     capture_metrics,
 )
+from unstract.sdk1.utils.retry_utils import (
+    acall_with_retry,
+    call_with_retry,
+    is_retryable_litellm_error,
+    iter_with_retry,
+    pop_litellm_retry_kwargs,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -285,9 +292,14 @@ def complete(self, prompt: str, **kwargs: object) -> dict[str, object]:
             # if hasattr(self, "thinking_dict") and self.thinking_dict is not None:
             #     completion_kwargs["temperature"] = 1
 
-            response: dict[str, object] = litellm.completion(
-                messages=messages,
-                **completion_kwargs,
+            max_retries = pop_litellm_retry_kwargs(
+                completion_kwargs, self._get_adapter_info()
+            )
+            response: dict[str, object] = call_with_retry(
+                lambda: litellm.completion(messages=messages, **completion_kwargs),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
             )
 
             response_text = response["choices"][0]["message"]["content"]
@@ -373,14 +385,20 @@ def stream_complete(
             completion_kwargs = self.adapter.validate({**self.kwargs, **kwargs})
             completion_kwargs.pop("cost_model", None)
 
+            max_retries = pop_litellm_retry_kwargs(
+                completion_kwargs, self._get_adapter_info()
+            )
             has_yielded_content = False
-            for chunk in litellm.completion(
-                messages=messages,
-                stream=True,
-                stream_options={
-                    "include_usage": True,
-                },
-                **completion_kwargs,
+            for chunk in iter_with_retry(
+                lambda: litellm.completion(
+                    messages=messages,
+                    stream=True,
+                    stream_options={"include_usage": True},
+                    **completion_kwargs,
+                ),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
             ):
                 if chunk.get("usage"):
                     self._record_usage(
@@ -437,9 +455,14 @@ async def acomplete(self, prompt: str, **kwargs: object) -> dict[str, object]:
             completion_kwargs = self.adapter.validate({**self.kwargs, **kwargs})
             completion_kwargs.pop("cost_model", None)
 
-            response = await litellm.acompletion(
-                messages=messages,
-                **completion_kwargs,
+            max_retries = pop_litellm_retry_kwargs(
+                completion_kwargs, self._get_adapter_info()
+            )
+            response = await acall_with_retry(
+                lambda: litellm.acompletion(messages=messages, **completion_kwargs),
+                max_retries=max_retries,
+                retry_predicate=is_retryable_litellm_error,
+                description=self._get_adapter_info(),
             )
             response_text = response["choices"][0]["message"]["content"]
             finish_reason = response["choices"][0].get("finish_reason")
diff --git a/unstract/sdk1/src/unstract/sdk1/utils/retry_utils.py b/unstract/sdk1/src/unstract/sdk1/utils/retry_utils.py
index 9d1a1e4ec1..0a13331474 100644
--- a/unstract/sdk1/src/unstract/sdk1/utils/retry_utils.py
+++ b/unstract/sdk1/src/unstract/sdk1/utils/retry_utils.py
@@ -1,35 +1,289 @@
 """Generic retry utilities with custom exponential backoff implementation."""
 
+import asyncio
+import builtins
 import errno
 import logging
 import os
 import random
 import time
-from collections.abc import Callable
+from collections.abc import Awaitable, Callable, Generator, Iterable
 from functools import wraps
 from typing import Any
 
-from requests.exceptions import ConnectionError, HTTPError, Timeout
+from requests.exceptions import ConnectionError as RequestsConnectionError
+from requests.exceptions import HTTPError, Timeout
 
 logger = logging.getLogger(__name__)
 
+# HTTP status codes that indicate transient server-side failures worth retrying.
+RETRYABLE_STATUS_CODES = frozenset({408, 429, 500, 502, 503, 504})
+
+# Exception class names (from litellm, openai, httpx) that indicate transient
+# connection/timeout failures. Resolved via duck-typing to avoid importing
+# litellm in this utility module.
+_RETRYABLE_ERROR_NAMES = frozenset(
+    {
+        "APIConnectionError",
+        "APITimeoutError",
+        "Timeout",
+        "ConnectTimeout",
+        "ReadTimeout",
+    }
+)
 
-def is_retryable_error(error: Exception) -> bool:
-    """Check if an error is retryable.
 
-    Handles:
-    - ConnectionError and Timeout from requests
-    - HTTPError with status codes 502, 503, 504
-    - OSError with specific errno codes (ECONNREFUSED, ECONNRESET, etc.)
+def is_retryable_litellm_error(error: Exception) -> bool:
+    """Check if a litellm/provider API error should trigger a retry.
 
-    Args:
-        error: The exception to check
+    Distinct from is_retryable_error() which handles requests-library exceptions
+    (requests.ConnectionError, requests.HTTPError.response.status_code, OSError).
+    litellm/openai/httpx have a separate exception hierarchy: status_code lives
+    on the exception itself, and class names like APIConnectionError don't inherit
+    from the requests types. Uses duck-typing to avoid importing litellm directly.
+    """
+    # Python built-in connection / timeout base classes (not requests.ConnectionError)
+    if isinstance(error, builtins.ConnectionError | builtins.TimeoutError):
+        return True
 
-    Returns:
-        True if the error should trigger a retry
+    # litellm/openai/httpx exception types that don't inherit from the
+    # built-ins above but still represent transient network failures.
+    # Check MRO to also catch subclasses of these error types.
+    if any(cls.__name__ in _RETRYABLE_ERROR_NAMES for cls in type(error).__mro__):
+        return True
+
+    # Status-code check covers litellm.RateLimitError (429),
+    # InternalServerError (500), ServiceUnavailableError (503), etc.
+    status_code = getattr(error, "status_code", None)
+    if status_code is not None and status_code in RETRYABLE_STATUS_CODES:
+        return True
+
+    return False
+
+
+# ── Shared retry decision ───────────────────────────────────────────────────
+
+
+def _extract_retry_after(error: Exception) -> float | None:
+    """Return the server-supplied Retry-After delay in seconds, if present.
+
+    Honors the provider's explicit cool-down hint on 429/503 responses so our
+    backoff doesn't hammer the provider before its requested wait. Only the
+    integer/float seconds form is supported; RFC 7231 HTTP-date values fall
+    back to exponential backoff.
+    """
+    response = getattr(error, "response", None)
+    if response is None:
+        return None
+    headers = getattr(response, "headers", None)
+    if headers is None:
+        return None
+    value = headers.get("retry-after") or headers.get("Retry-After")
+    if value is None:
+        return None
+    try:
+        return max(float(value), 0.0)
+    except (TypeError, ValueError):
+        return None
+
+
+def _get_retry_delay(
+    error: Exception,
+    attempt: int,
+    max_retries: int,
+    retry_predicate: Callable[[Exception], bool] | None,
+    description: str,
+    logger_instance: logging.Logger,
+    base_delay: float = 1.0,
+    multiplier: float = 2.0,
+    max_delay: float = 60.0,
+    jitter: bool = True,
+) -> float | None:
+    """Decide whether to retry and compute the backoff delay.
+
+    Returns delay in seconds if the error is retryable, None otherwise.
+    The caller is responsible for sleeping (sync or async) and re-raising
+    when None is returned.
+    """
+    should_retry = retry_predicate(error) if retry_predicate is not None else True
+
+    logger_instance.debug(
+        "Retry decision: attempt=%d/%d error=%s retryable=%s description=%s",
+        attempt + 1,
+        max_retries + 1,
+        type(error).__name__,
+        should_retry,
+        description,
+    )
+
+    if not should_retry or attempt >= max_retries:
+        # Shared exhaustion log — fires for every retry helper once retries
+        # were actually attempted (attempt > 0) and the error was retryable
+        # (i.e. we stopped because we ran out of attempts, not because the
+        # error type was non-retryable).
+        if attempt > 0 and should_retry:
+            logger_instance.exception(
+                "Giving up %s after %d attempt(s)",
+                description,
+                attempt + 1,
+            )
+        return None
+
+    # Provider-supplied Retry-After (e.g. 429/503) wins over our exponential
+    # backoff — matches the behavior the OpenAI/Azure SDKs give natively.
+    retry_after = _extract_retry_after(error)
+    if retry_after is not None:
+        delay = retry_after
+    else:
+        delay = calculate_delay(attempt, base_delay, multiplier, max_delay, jitter)
+    logger_instance.warning(
+        "Retry %d/%d for %s: %s (waiting %.1fs)",
+        attempt + 1,
+        max_retries,
+        description,
+        error,
+        delay,
+    )
+    return delay
+
+
+# ── Generic retry wrappers ──────────────────────────────────────────────────
+# Unlike the decorator-based retry_with_exponential_backoff (env-var configured,
+# sync-only), these accept max_retries at call time and support async + generators.
+# All delegate retry decisions to _get_retry_delay above.
+
+
+def _validate_max_retries(max_retries: int) -> None:
+    if max_retries < 0:
+        raise ValueError(f"max_retries must be >= 0, got {max_retries}")
+
+
+def pop_litellm_retry_kwargs(kwargs: dict[str, Any], context: str = "") -> int:
+    """Pop max_retries from kwargs and disable litellm's built-in retries.
+
+    litellm has two separate retry mechanisms:
+    - max_retries: passed to the SDK client (OpenAI/Azure) as its
+      constructor arg — triggers SDK-level retries.
+    - num_retries: activates litellm's own completion_with_retries wrapper.
+
+    Both are zeroed so the outer retry helpers (call_with_retry etc.) are
+    the single source of truth. Note that num_retries=0 is dropped from
+    embedding kwargs by litellm.drop_params=True, but setting it keeps the
+    intent explicit and consistent across LLM/embedding paths.
+
+    Returns the user-configured max_retries value (or 0 if unset).
+    """
+    max_retries = kwargs.pop("max_retries", None) or 0
+    kwargs["max_retries"] = 0
+    kwargs["num_retries"] = 0
+    suffix = f" for {context}" if context else ""
+    logger.debug(
+        "Extracted max_retries=%d, disabled litellm retry "
+        "(max_retries=0, num_retries=0)%s",
+        max_retries,
+        suffix,
+    )
+    return max_retries
+
+
+def call_with_retry[T](
+    fn: Callable[[], T],
+    *,
+    max_retries: int,
+    retry_predicate: Callable[[Exception], bool],
+    description: str = "",
+    logger_instance: logging.Logger | None = None,
+) -> T:
+    """Execute fn() with retry on transient errors."""
+    _validate_max_retries(max_retries)
+    log = logger_instance or logger
+    for attempt in range(max_retries + 1):
+        try:
+            return fn()
+        except Exception as e:
+            delay = _get_retry_delay(
+                e, attempt, max_retries, retry_predicate, description, log
+            )
+            if delay is None:
+                raise
+            time.sleep(delay)
+    raise RuntimeError("unreachable")  # for type-checker: loop always returns or raises
+
+
+async def acall_with_retry[T](
+    fn: Callable[[], Awaitable[T]],
+    *,
+    max_retries: int,
+    retry_predicate: Callable[[Exception], bool],
+    description: str = "",
+    logger_instance: logging.Logger | None = None,
+) -> T:
+    """Async version of call_with_retry — awaits fn()."""
+    _validate_max_retries(max_retries)
+    log = logger_instance or logger
+    for attempt in range(max_retries + 1):
+        try:
+            return await fn()
+        except Exception as e:
+            delay = _get_retry_delay(
+                e, attempt, max_retries, retry_predicate, description, log
+            )
+            if delay is None:
+                raise
+            await asyncio.sleep(delay)
+    raise RuntimeError("unreachable")  # for type-checker: loop always returns or raises
+
+
+def iter_with_retry[T](
+    fn: Callable[[], Iterable[T]],
+    *,
+    max_retries: int,
+    retry_predicate: Callable[[Exception], bool],
+    description: str = "",
+    logger_instance: logging.Logger | None = None,
+) -> Generator[T, None, None]:
+    """Yield from fn() with retry. Only retries before the first yield.
+
+    Once items have been yielded to the caller a mid-iteration failure is
+    raised immediately — partial output can't be un-yielded.
+    """
+    _validate_max_retries(max_retries)
+    log = logger_instance or logger
+    for attempt in range(max_retries + 1):
+        has_yielded = False
+        gen = fn()
+        try:
+            for item in gen:
+                has_yielded = True
+                yield item
+            return
+        except Exception as e:
+            # Close generator to release in-flight HTTP/socket resources
+            # before retrying — otherwise streaming providers leak sockets
+            # until GC.
+            close = getattr(gen, "close", None)
+            if callable(close):
+                close()
+            if has_yielded:
+                raise
+            delay = _get_retry_delay(
+                e, attempt, max_retries, retry_predicate, description, log
+            )
+            if delay is None:
+                raise
+            time.sleep(delay)
+
+
+def is_retryable_error(error: Exception) -> bool:
+    """Check if a requests-library HTTP error should trigger a retry.
+
+    For retrying internal service calls (platform-service, prompt-service) that
+    use the requests library. Distinct from is_retryable_litellm_error() which
+    handles litellm/openai/httpx exceptions with different class hierarchies
+    (e.g. error.status_code vs error.response.status_code).
     """
     # Requests connection and timeout errors
-    if isinstance(error, ConnectionError | Timeout):
+    if isinstance(error, RequestsConnectionError | Timeout):
         return True
 
     # HTTP errors with specific status codes
@@ -85,7 +339,57 @@ def calculate_delay(
     return min(delay, max_delay)
 
 
-def retry_with_exponential_backoff(  # noqa: C901
+def _invoke_with_retries(
+    func: Callable,
+    args: tuple,
+    kwargs: dict,
+    *,
+    max_retries: int,
+    base_delay: float,
+    multiplier: float,
+    jitter: bool,
+    exceptions: tuple[type[Exception], ...],
+    logger_instance: logging.Logger,
+    prefix: str,
+    retry_predicate: Callable[[Exception], bool] | None,
+) -> Any:  # noqa: ANN401
+    """Execute func with exponential-backoff retries.
+
+    See retry_with_exponential_backoff for parameter semantics.
+    """
+    for attempt in range(max_retries + 1):
+        try:
+            result = func(*args, **kwargs)
+        except exceptions as e:
+            delay = _get_retry_delay(
+                e,
+                attempt,
+                max_retries,
+                retry_predicate,
+                prefix,
+                logger_instance,
+                base_delay,
+                multiplier,
+                60.0,
+                jitter,
+            )
+            if delay is not None:
+                time.sleep(delay)
+                continue
+            # Give-up log is emitted inside _get_retry_delay so all retry
+            # helpers share the same exhaustion signal.
+            raise
+        if attempt > 0:
+            logger_instance.info(
+                "Successfully completed '%s' after %d retry attempt(s)",
+                func.__name__,
+                attempt,
+            )
+        return result
+    return None  # unreachable: range(max_retries + 1) is non-empty
+
+
+def retry_with_exponential_backoff(
     max_retries: int,
     base_delay: float,
     multiplier: float,
@@ -111,71 +415,22 @@ def retry_with_exponential_backoff(  # noqa: C901
         Decorator function
     """
 
-    def decorator(func: Callable) -> Callable:  # noqa: C901
+    def decorator(func: Callable) -> Callable:
         @wraps(func)
-        def wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: C901, ANN401
-            last_exception = None
-
-            for attempt in range(max_retries + 1):  # +1 for initial attempt
-                try:
-                    # Try to execute the function
-                    result = func(*args, **kwargs)
-
-                    # If successful and we had retried, log success
-                    if attempt > 0:
-                        logger_instance.info(
-                            "Successfully completed '%s' after %d retry attempt(s)",
-                            func.__name__,
-                            attempt,
-                        )
-
-                    return result
-
-                except exceptions as e:
-                    last_exception = e
-
-                    # Check if the error should trigger a retry
-                    # First check if it's in the allowed exception types (already caught)
-                    # Then check using the predicate if provided
-                    should_retry = True
-                    if retry_predicate is not None:
-                        should_retry = retry_predicate(e)
-
-                    # If not retryable or last attempt, raise the error
-                    if not should_retry or attempt == max_retries:
-                        if attempt > 0:
-                            logger_instance.exception(
-                                "Giving up '%s' after %d attempt(s) for %s",
-                                func.__name__,
-                                attempt + 1,
-                                prefix,
-                            )
-                        raise
-
-                    # Calculate delay for next retry (capped at 60s)
-                    delay = calculate_delay(attempt, base_delay, multiplier, 60.0, jitter)
-
-                    # Log retry attempt
-                    logger_instance.warning(
-                        "Retry %d/%d for %s: %s (waiting %.1fs)",
-                        attempt + 1,
-                        max_retries,
-                        prefix,
-                        e,
-                        delay,
-                    )
-
-                    # Wait before retrying
-                    time.sleep(delay)
-
-                except Exception as e:
-                    # Exception not in the exceptions tuple - don't retry
-                    last_exception = e
-                    raise
-
-            # This should never be reached, but just in case
-            if last_exception:
-                raise last_exception
+        def wrapper(*args: Any, **kwargs: Any) -> Any:  # noqa: ANN401
+            return _invoke_with_retries(
+                func,
+                args,
+                kwargs,
+                max_retries=max_retries,
+                base_delay=base_delay,
+                multiplier=multiplier,
+                jitter=jitter,
+                exceptions=exceptions,
+                logger_instance=logger_instance,
+                prefix=prefix,
+                retry_predicate=retry_predicate,
+            )
 
         return wrapper
 
@@ -193,7 +448,7 @@ def create_retry_decorator(
     Args:
         prefix: Environment variable prefix for configuration
         exceptions: Tuple of exception types to retry on.
-                   Defaults to (ConnectionError, HTTPError, Timeout, OSError)
+                   Defaults to (RequestsConnectionError, HTTPError, Timeout, OSError)
         retry_predicate: Optional callable to determine if exception should trigger retry.
                         If only exceptions list provided, retry on those exceptions.
                         If only predicate provided, use predicate (catch all exceptions).
@@ -212,7 +467,7 @@ def create_retry_decorator(
     # Handle different combinations of exceptions and predicate
     if exceptions is None and retry_predicate is None:
         # Default case: use specific exceptions with is_retryable_error predicate
-        exceptions = (ConnectionError, HTTPError, Timeout, OSError)
+        exceptions = (RequestsConnectionError, HTTPError, Timeout, OSError)
         retry_predicate = is_retryable_error
     elif exceptions is None and retry_predicate is not None:
         # Only predicate provided: catch all exceptions and use predicate