From d00f263568289fd5905f359245b3d12c29666530 Mon Sep 17 00:00:00 2001 From: Jacob Ioffe Date: Tue, 26 May 2026 19:58:34 +0000 Subject: [PATCH 1/2] Fail loudly on local embedding failures --- .../src/nemo_retriever/text_embed/runtime.py | 38 ++++++++++- .../tests/test_text_embed_runtime.py | 67 +++++++++++++++++++ 2 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 nemo_retriever/tests/test_text_embed_runtime.py diff --git a/nemo_retriever/src/nemo_retriever/text_embed/runtime.py b/nemo_retriever/src/nemo_retriever/text_embed/runtime.py index de3e3d5e21..b513127af8 100644 --- a/nemo_retriever/src/nemo_retriever/text_embed/runtime.py +++ b/nemo_retriever/src/nemo_retriever/text_embed/runtime.py @@ -16,7 +16,26 @@ from nemo_retriever.nim.error_reporter import report_error from nemo_retriever.model import VL_EMBED_MODEL, resolve_embed_model from nemo_retriever.params.models import IMAGE_MODALITIES -from nemo_retriever.text_embed.main_text_embed import TextEmbeddingConfig, create_text_embeddings_for_df +from nemo_retriever.text_embed.main_text_embed import ( + TextEmbeddingConfig, + create_text_embeddings_for_df, + _image_from_row, + _text_from_row, +) + + +def _is_local_embed(endpoint: Optional[str], model: Any) -> bool: + return endpoint is None and model is not None + + +def _row_requires_embedding(row: pd.Series, *, embed_modality: str, text_column: str) -> bool: + row_modality = row.get("_embed_modality", embed_modality) + modality = str(row_modality or embed_modality) + if modality == "image": + return _image_from_row(row) is not None + if modality == "text_image": + return _text_from_row(row, text_column=text_column) is not None or _image_from_row(row) is not None + return _text_from_row(row, text_column=text_column) is not None def _embed_group( @@ -184,6 +203,8 @@ def embed_text_main_text_embed( logger.debug("torch.cuda.empty_cache() failed during error cleanup: %s", _cache_exc) logger.error("Embedding failed: %s: %s", type(exc).__name__, exc, exc_info=True) report_error("embed", exc) + if _is_local_embed(endpoint, model): + raise out_df = batch_df.copy() out_df[output_column] = [{"embedding": [], "error": str(exc)}] * len(out_df) out_df[embedding_dim_column] = 0 @@ -211,6 +232,21 @@ def dim(row: pd.Series) -> int: out_df[has_embedding_column] = [bool(int(d) > 0) for d in out_df[embedding_dim_column].tolist()] + if _is_local_embed(endpoint, model): + required_mask = out_df.apply( + _row_requires_embedding, + embed_modality=embed_modality, + text_column=text_column, + axis=1, + ) + missing_mask = required_mask & ~out_df[has_embedding_column].map(bool) + if bool(missing_mask.any()): + missing_count = int(missing_mask.sum()) + raise RuntimeError( + "Local embedding did not produce vectors for " + f"{missing_count} row(s); aborting instead of returning rows without embeddings." + ) + embedded_flags = out_df[has_embedding_column].tolist() out_df["embedding_v1_num_detections"] = [int(f) for f in embedded_flags] out_df["embedding_v1_counts_by_label"] = [{"embedded": 1} if f else {} for f in embedded_flags] diff --git a/nemo_retriever/tests/test_text_embed_runtime.py b/nemo_retriever/tests/test_text_embed_runtime.py new file mode 100644 index 0000000000..abc4e018e9 --- /dev/null +++ b/nemo_retriever/tests/test_text_embed_runtime.py @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +import pandas as pd +import pytest + +from nemo_retriever.text_embed import runtime + + +@pytest.fixture(autouse=True) +def _clear_reported_errors(): + from nemo_retriever.nim.error_reporter import drain_errors + + drain_errors() + yield + drain_errors() + + +class _FailingEmbedder: + def embed(self, texts, batch_size: int): + raise RuntimeError("backend unavailable") + + +class _EmptyEmbedder: + def embed(self, texts, batch_size: int): + return [[] for _ in texts] + + +def _text_df() -> pd.DataFrame: + return pd.DataFrame({"text": ["hello"], "metadata": [{}]}) + + +def test_local_embed_failure_raises_instead_of_returning_empty_rows(): + with pytest.raises(RuntimeError, match="backend unavailable"): + runtime.embed_text_main_text_embed(_text_df(), model=_FailingEmbedder()) + + +def test_local_empty_embedding_for_valid_row_raises(): + with pytest.raises(RuntimeError, match="Local embedding did not produce vectors"): + runtime.embed_text_main_text_embed(_text_df(), model=_EmptyEmbedder()) + + +def test_blank_local_text_rows_can_pass_without_embeddings(): + df = pd.DataFrame({"text": [" "], "metadata": [{}]}) + result = runtime.embed_text_main_text_embed(df, model=_EmptyEmbedder()) + + assert result.iloc[0]["text_embeddings_1b_v2_dim"] == 0 + assert not bool(result.iloc[0]["text_embeddings_1b_v2_has_embedding"]) + + +def test_remote_embed_failure_preserves_error_payload(monkeypatch): + def _raise_embed_group(*args, **kwargs): + raise RuntimeError("remote unavailable") + + monkeypatch.setattr(runtime, "_embed_group", _raise_embed_group) + + result = runtime.embed_text_main_text_embed(_text_df(), embedding_endpoint="http://embed.example/v1") + + assert result.iloc[0]["text_embeddings_1b_v2"] == { + "embedding": [], + "error": "remote unavailable", + } + assert result.iloc[0]["text_embeddings_1b_v2_dim"] == 0 + assert not bool(result.iloc[0]["text_embeddings_1b_v2_has_embedding"]) From 497a7946f14bbaedd87340c45d6141fdb50ac31b Mon Sep 17 00:00:00 2001 From: Jacob Ioffe Date: Tue, 26 May 2026 22:25:25 +0000 Subject: [PATCH 2/2] Narrow local embed failure handling --- .../src/nemo_retriever/text_embed/runtime.py | 32 +------------------ .../tests/test_text_embed_runtime.py | 10 ++---- 2 files changed, 3 insertions(+), 39 deletions(-) diff --git a/nemo_retriever/src/nemo_retriever/text_embed/runtime.py b/nemo_retriever/src/nemo_retriever/text_embed/runtime.py index b513127af8..62d65c90fa 100644 --- a/nemo_retriever/src/nemo_retriever/text_embed/runtime.py +++ b/nemo_retriever/src/nemo_retriever/text_embed/runtime.py @@ -16,28 +16,13 @@ from nemo_retriever.nim.error_reporter import report_error from nemo_retriever.model import VL_EMBED_MODEL, resolve_embed_model from nemo_retriever.params.models import IMAGE_MODALITIES -from nemo_retriever.text_embed.main_text_embed import ( - TextEmbeddingConfig, - create_text_embeddings_for_df, - _image_from_row, - _text_from_row, -) +from nemo_retriever.text_embed.main_text_embed import TextEmbeddingConfig, create_text_embeddings_for_df def _is_local_embed(endpoint: Optional[str], model: Any) -> bool: return endpoint is None and model is not None -def _row_requires_embedding(row: pd.Series, *, embed_modality: str, text_column: str) -> bool: - row_modality = row.get("_embed_modality", embed_modality) - modality = str(row_modality or embed_modality) - if modality == "image": - return _image_from_row(row) is not None - if modality == "text_image": - return _text_from_row(row, text_column=text_column) is not None or _image_from_row(row) is not None - return _text_from_row(row, text_column=text_column) is not None - - def _embed_group( group_df: pd.DataFrame, *, @@ -232,21 +217,6 @@ def dim(row: pd.Series) -> int: out_df[has_embedding_column] = [bool(int(d) > 0) for d in out_df[embedding_dim_column].tolist()] - if _is_local_embed(endpoint, model): - required_mask = out_df.apply( - _row_requires_embedding, - embed_modality=embed_modality, - text_column=text_column, - axis=1, - ) - missing_mask = required_mask & ~out_df[has_embedding_column].map(bool) - if bool(missing_mask.any()): - missing_count = int(missing_mask.sum()) - raise RuntimeError( - "Local embedding did not produce vectors for " - f"{missing_count} row(s); aborting instead of returning rows without embeddings." - ) - embedded_flags = out_df[has_embedding_column].tolist() out_df["embedding_v1_num_detections"] = [int(f) for f in embedded_flags] out_df["embedding_v1_counts_by_label"] = [{"embedded": 1} if f else {} for f in embedded_flags] diff --git a/nemo_retriever/tests/test_text_embed_runtime.py b/nemo_retriever/tests/test_text_embed_runtime.py index abc4e018e9..d6075e2f39 100644 --- a/nemo_retriever/tests/test_text_embed_runtime.py +++ b/nemo_retriever/tests/test_text_embed_runtime.py @@ -38,14 +38,8 @@ def test_local_embed_failure_raises_instead_of_returning_empty_rows(): runtime.embed_text_main_text_embed(_text_df(), model=_FailingEmbedder()) -def test_local_empty_embedding_for_valid_row_raises(): - with pytest.raises(RuntimeError, match="Local embedding did not produce vectors"): - runtime.embed_text_main_text_embed(_text_df(), model=_EmptyEmbedder()) - - -def test_blank_local_text_rows_can_pass_without_embeddings(): - df = pd.DataFrame({"text": [" "], "metadata": [{}]}) - result = runtime.embed_text_main_text_embed(df, model=_EmptyEmbedder()) +def test_local_empty_embedding_result_is_reported_without_breaking_batch(): + result = runtime.embed_text_main_text_embed(_text_df(), model=_EmptyEmbedder()) assert result.iloc[0]["text_embeddings_1b_v2_dim"] == 0 assert not bool(result.iloc[0]["text_embeddings_1b_v2_has_embedding"])