diff --git a/EMBEDDINGS.md b/EMBEDDINGS.md index bccdc72..dc86de2 100644 --- a/EMBEDDINGS.md +++ b/EMBEDDINGS.md @@ -135,6 +135,18 @@ envs: VOYAGE_API_KEY: your-api-key-here ``` +To access Voyage models through [MongoDB Atlas](https://www.mongodb.com/docs/voyageai/api-and-clients/), +use the `mongodb/` prefix. Requests are routed to `https://ai.mongodb.com/v1` using your +Atlas model API key (still read from `VOYAGE_API_KEY`): + +```yaml +embedding: + provider: litellm + model: mongodb/voyage-4-large +envs: + VOYAGE_API_KEY: your-atlas-model-api-key +``` + For more information, see the [LiteLLM Providers Documentation](https://docs.litellm.ai/docs/providers). [Back to top](#table-of-contents) diff --git a/src/cocoindex_code/embedder_defaults.py b/src/cocoindex_code/embedder_defaults.py index 0ed52b9..f75c7a5 100644 --- a/src/cocoindex_code/embedder_defaults.py +++ b/src/cocoindex_code/embedder_defaults.py @@ -80,7 +80,8 @@ class DefaultParamsEntry(NamedTuple): ), DefaultParamsEntry( "litellm", - re.compile(r"voyage/.+"), + # ``mongodb/`` routes Voyage models through MongoDB Atlas; same params. + re.compile(r"(voyage|mongodb)/.+"), {"input_type": "document"}, {"input_type": "query"}, ), diff --git a/src/cocoindex_code/litellm_embedder.py b/src/cocoindex_code/litellm_embedder.py index 63083f0..0a9d790 100644 --- a/src/cocoindex_code/litellm_embedder.py +++ b/src/cocoindex_code/litellm_embedder.py @@ -18,6 +18,23 @@ _RATE_LIMIT_DELAY_RE = re.compile(r"Please try again in ([0-9.]+)(ms|s)", re.IGNORECASE) _MAX_RATE_LIMIT_RETRIES = 6 +# MongoDB Atlas serves Voyage AI embedding models through an endpoint that +# speaks the native Voyage API, so a ``mongodb/`` model name is +# routed via LiteLLM's ``voyage/`` provider with ``api_base`` pointed at Atlas. +_MONGODB_PREFIX = "mongodb/" +_MONGODB_API_BASE = "https://ai.mongodb.com/v1" + + +def _resolve_mongodb_model(model: str) -> tuple[str | None, str]: + """Map a ``mongodb/`` model name to a Voyage model plus the Atlas base URL. + + Returns ``(api_base, model)``. For non-MongoDB models ``api_base`` is ``None`` + and the model is returned unchanged. + """ + if model.startswith(_MONGODB_PREFIX): + return _MONGODB_API_BASE, "voyage/" + model[len(_MONGODB_PREFIX) :] + return None, model + def _get_rate_limit_delay(exc: Exception, attempt: int) -> float | None: message = str(exc) @@ -39,7 +56,9 @@ class PacedLiteLLMEmbedder(LiteLLMEmbedder): """LiteLLM embedder that serializes requests and paces them when configured.""" def __init__(self, model: str, *, min_interval_ms: int | None = None, **kwargs: Any) -> None: + api_base, model = _resolve_mongodb_model(model) super().__init__(model, **kwargs) + self._api_base = api_base self._min_request_interval_seconds = max(0.0, float(min_interval_ms or 0) / 1000.0) self._request_lock: asyncio.Lock | None = None self._next_request_at: float = 0.0 @@ -85,6 +104,10 @@ async def run_embedding_request(self, *, input: list[str], **kwargs: Any) -> Any if not self._model.startswith(("voyage/", "bedrock/")): kwargs["encoding_format"] = "float" kwargs["drop_params"] = True + + if self._api_base is not None: + kwargs.setdefault("api_base", self._api_base) + response = await self._aembedding_with_rate_limit_retries( model=self._model, input=input, diff --git a/tests/test_embedder_defaults.py b/tests/test_embedder_defaults.py index 425dbf6..6598c0b 100644 --- a/tests/test_embedder_defaults.py +++ b/tests/test_embedder_defaults.py @@ -35,6 +35,12 @@ def test_lookup_defaults_regex_match_voyage() -> None: assert query == {"input_type": "query"} +def test_lookup_defaults_regex_match_mongodb_voyage() -> None: + indexing, query = lookup_defaults("litellm", "mongodb/voyage-4") + assert indexing == {"input_type": "document"} + assert query == {"input_type": "query"} + + def test_lookup_defaults_regex_match_cohere() -> None: indexing, query = lookup_defaults("litellm", "cohere/embed-english-v3.0") assert indexing == {"input_type": "search_document"} diff --git a/tests/test_litellm_embedder.py b/tests/test_litellm_embedder.py index 17b7375..2f608e8 100644 --- a/tests/test_litellm_embedder.py +++ b/tests/test_litellm_embedder.py @@ -94,3 +94,27 @@ async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any assert "encoding_format" not in seen_kwargs assert "drop_params" not in seen_kwargs + + +@pytest.mark.asyncio +async def test_mongodb_model_routes_to_voyage_with_atlas_api_base( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seen_model: list[str] = [] + seen_kwargs: dict[str, Any] = {} + + async def fake_aembedding(*, model: str, input: list[str], **kwargs: Any) -> Any: + seen_model.append(model) + seen_kwargs.update(kwargs) + return SimpleNamespace(data=[{"embedding": [1.0, 2.0]}]) + + monkeypatch.setattr("cocoindex_code.litellm_embedder.litellm.aembedding", fake_aembedding) + + embedder = PacedLiteLLMEmbedder("mongodb/voyage-4") + await embedder.run_embedding_request(input=["hello"]) + + assert seen_model == ["voyage/voyage-4"] + assert seen_kwargs["api_base"] == "https://ai.mongodb.com/v1" + # Routed as a native Voyage provider, so encoding_format is not forced. + assert "encoding_format" not in seen_kwargs + assert "drop_params" not in seen_kwargs