Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions EMBEDDINGS.md
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ envs:
VOYAGE_API_KEY: your-api-key-here
```

To access Voyage models through [MongoDB Atlas](https://www.mongodb.com/docs/voyageai/api-and-clients/),
use the `mongodb/` prefix. Requests are routed to `https://ai.mongodb.com/v1` using your
Atlas model API key (still read from `VOYAGE_API_KEY`):

```yaml
embedding:
provider: litellm
model: mongodb/voyage-4-large
envs:
VOYAGE_API_KEY: your-atlas-model-api-key
```

For more information, see the [LiteLLM Providers Documentation](https://docs.litellm.ai/docs/providers).

[Back to top](#table-of-contents)
Expand Down
3 changes: 2 additions & 1 deletion src/cocoindex_code/embedder_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class DefaultParamsEntry(NamedTuple):
),
DefaultParamsEntry(
"litellm",
re.compile(r"voyage/.+"),
# ``mongodb/`` routes Voyage models through MongoDB Atlas; same params.
re.compile(r"(voyage|mongodb)/.+"),
{"input_type": "document"},
{"input_type": "query"},
),
Expand Down
23 changes: 23 additions & 0 deletions src/cocoindex_code/litellm_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,23 @@
_RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE)
_MAX_RATE_LIMIT_RETRIES = 6

# MongoDB Atlas serves Voyage AI embedding models through an endpoint that
# speaks the native Voyage API, so a ``mongodb/<voyage-model>`` model name is
# routed via LiteLLM's ``voyage/`` provider with ``api_base`` pointed at Atlas.
_MONGODB_PREFIX = "mongodb/"
_MONGODB_API_BASE = "https://ai.mongodb.com/v1"


def _resolve_mongodb_model(model: str) -> tuple[str | None, str]:
"""Map a ``mongodb/`` model name to a Voyage model plus the Atlas base URL.

Returns ``(api_base, model)``. For non-MongoDB models ``api_base`` is ``None``
and the model is returned unchanged.
"""
if model.startswith(_MONGODB_PREFIX):
return _MONGODB_API_BASE, "voyage/" + model[len(_MONGODB_PREFIX) :]
return None, model


def _get_rate_limit_delay(exc: Exception, attempt: int) -> float | None:
message = str(exc)
Expand All @@ -39,7 +56,9 @@ class PacedLiteLLMEmbedder(LiteLLMEmbedder):
"""LiteLLM embedder that serializes requests and paces them when configured."""

def __init__(self, model: str, *, min_interval_ms: int | None = None, **kwargs: Any) -> None:
api_base, model = _resolve_mongodb_model(model)
super().__init__(model, **kwargs)
self._api_base = api_base
self._min_request_interval_seconds = max(0.0, float(min_interval_ms or 0) / 1000.0)
self._request_lock: asyncio.Lock | None = None
self._next_request_at: float = 0.0
Expand Down Expand Up @@ -85,6 +104,10 @@ async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any
if not self._model.startswith(("voyage/", "bedrock/")):
kwargs["encoding_format"] = "float"
kwargs["drop_params"] = True

if self._api_base is not None:
kwargs.setdefault("api_base", self._api_base)

response = await self._aembedding_with_rate_limit_retries(
model=self._model,
input=input,
Expand Down
6 changes: 6 additions & 0 deletions tests/test_embedder_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,12 @@ def test_lookup_defaults_regex_match_voyage() -> None:
assert query == {"input_type": "query"}


def test_lookup_defaults_regex_match_mongodb_voyage() -> None:
indexing, query = lookup_defaults("litellm", "mongodb/voyage-4")
assert indexing == {"input_type": "document"}
assert query == {"input_type": "query"}


def test_lookup_defaults_regex_match_cohere() -> None:
indexing, query = lookup_defaults("litellm", "cohere/embed-english-v3.0")
assert indexing == {"input_type": "search_document"}
Expand Down
24 changes: 24 additions & 0 deletions tests/test_litellm_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,27 @@ async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any

assert "encoding_format" not in seen_kwargs
assert "drop_params" not in seen_kwargs


@pytest.mark.asyncio
async def test_mongodb_model_routes_to_voyage_with_atlas_api_base(
monkeypatch: pytest.MonkeyPatch,
) -> None:
seen_model: list[str] = []
seen_kwargs: dict[str, Any] = {}

async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any:
seen_model.append(model)
seen_kwargs.update(kwargs)
return SimpleNamespace(data=[{"embedding": [1.0, 2.0]}])

monkeypatch.setattr("cocoindex_code.litellm_embedder.litellm.aembedding", fake_aembedding)

embedder = PacedLiteLLMEmbedder("mongodb/voyage-4")
await embedder.run_embedding_request(input=["hello"])

assert seen_model == ["voyage/voyage-4"]
assert seen_kwargs["api_base"] == "https://ai.mongodb.com/v1"
# Routed as a native Voyage provider, so encoding_format is not forced.
assert "encoding_format" not in seen_kwargs
assert "drop_params" not in seen_kwargs
Loading