From d5a6ef1d215c1040501a9320a0895a8de2c6e7b7 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Fri, 12 Dec 2025 11:17:48 -0500 Subject: [PATCH 1/5] add drop_existing kwarg to set_xindex --- doc/internals/how-to-create-custom-index.rst | 9 +++----- doc/whats-new.rst | 4 ++++ xarray/core/dataarray.py | 8 ++++++- xarray/core/dataset.py | 13 +++++++++--- xarray/tests/test_dataarray.py | 6 ++++++ xarray/tests/test_dataset.py | 22 ++++++++++++++++++++ 6 files changed, 52 insertions(+), 10 deletions(-) diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst index 351694fc62d..4621e052539 100644 --- a/doc/internals/how-to-create-custom-index.rst +++ b/doc/internals/how-to-create-custom-index.rst @@ -224,12 +224,9 @@ custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: dims=("y", "x"), ) - # Xarray create default indexes for the 'x' and 'y' coordinates - # we first need to explicitly drop it - da = da.drop_indexes(["x", "y"]) - - # Build a RasterIndex from the 'x' and 'y' coordinates - da_raster = da.set_xindex(["x", "y"], RasterIndex) + # Xarray creates default indexes for the 'x' and 'y' coordinates + # Use drop_existing=True to replace them with a custom index + da_raster = da.set_xindex(["x", "y"], RasterIndex, drop_existing=True) # RasterIndex now takes care of label-based selection selected = da_raster.sel(x=10, y=slice(20, 50)) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 89e9fbef56f..4bdafa3eb82 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,6 +14,10 @@ v2025.12.1 (unreleased) New Features ~~~~~~~~~~~~ +- Added ``drop_existing`` parameter to :py:meth:`Dataset.set_xindex` and + :py:meth:`DataArray.set_xindex` to allow replacing existing indexes without + needing to call :py:meth:`drop_indexes` first (:pull:`XXXX`). + By `Ian Hunt-Isaak `_. Breaking Changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 71d427d3db9..d1fc3796421 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2866,6 +2866,7 @@ def set_xindex( self, coord_names: str | Sequence[Hashable], index_cls: type[Index] | None = None, + drop_existing: bool = False, **options, ) -> Self: """Set a new, Xarray-compatible index from one or more existing @@ -2879,6 +2880,9 @@ def set_xindex( index_cls : subclass of :class:`~xarray.indexes.Index` The type of index to create. By default, try setting a pandas (multi-)index from the supplied coordinates. + drop_existing : bool + Whether to drop indexes on any existing coord_names if one + is present. **options Options passed to the index constructor. @@ -2888,7 +2892,9 @@ def set_xindex( Another dataarray, with this dataarray's data and with a new index. """ - ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options) + ds = self._to_temp_dataset().set_xindex( + coord_names, index_cls, drop_existing, **options + ) return self._from_temp_dataset(ds) def reorder_levels( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bce048048da..9f6a32e8352 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4956,6 +4956,7 @@ def set_xindex( self, coord_names: str | Sequence[Hashable], index_cls: type[Index] | None = None, + drop_existing: bool = False, **options, ) -> Self: """Set a new, Xarray-compatible index from one or more existing @@ -4970,6 +4971,9 @@ def set_xindex( The type of index to create. By default, try setting a ``PandasIndex`` if ``len(coord_names) == 1``, otherwise a ``PandasMultiIndex``. + drop_existing : bool + Whether to drop indexes on any existing coord_names if one + is present **options Options passed to the index constructor. @@ -5010,9 +5014,12 @@ def set_xindex( indexed_coords = set(coord_names) & set(self._indexes) if indexed_coords: - raise ValueError( - f"those coordinates already have an index: {indexed_coords}" - ) + if drop_existing: + self.drop_indexes(indexed_coords) + else: + raise ValueError( + f"those coordinates already have an index: {indexed_coords}" + ) coord_vars = {name: self._variables[name] for name in coord_names} diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 7ae36421e14..5ea327f5ece 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2418,6 +2418,12 @@ def from_variables(cls, variables, options): assert "foo" in indexed.xindexes assert indexed.xindexes["foo"].opt == 1 # type: ignore[attr-defined] + def test_set_xindex_drop_existing(self) -> None: + # Basic test that drop_existing parameter is passed through to Dataset + da = DataArray([1, 2, 3, 4], coords={"x": ("x", [0, 1, 2, 3])}, dims="x") + result = da.set_xindex("x", PandasIndex, drop_existing=True) + assert "x" in result.xindexes + def test_dataset_getitem(self) -> None: dv = self.ds["foo"] assert_identical(dv, self.dv) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 6dce32aeb5c..e87cb45d631 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4000,6 +4000,28 @@ class NotAnIndex: ... with pytest.raises(ValueError, match="those coordinates already have an index"): ds2.set_xindex("x", PandasIndex) + def test_set_xindex_drop_existing(self) -> None: + # Test that drop_existing=True allows replacing an existing index + # (the default drop_existing=False raising ValueError is tested in test_set_xindex) + ds = Dataset(coords={"x": ("x", [0, 1, 2, 3])}) + + # With drop_existing=True, it should succeed + result = ds.set_xindex("x", PandasIndex, drop_existing=True) + assert "x" in result.xindexes + assert isinstance(result.xindexes["x"], PandasIndex) + + # Test that drop_existing=True replaces with a custom index + class CustomIndex(PandasIndex): + pass + + result_custom = ds.set_xindex("x", CustomIndex, drop_existing=True) + assert "x" in result_custom.xindexes + assert isinstance(result_custom.xindexes["x"], CustomIndex) + + # Verify the result is equivalent to drop_indexes + set_xindex + expected = ds.drop_indexes("x").set_xindex("x", CustomIndex) + assert_identical(result_custom, expected) + def test_set_xindex_options(self) -> None: ds = Dataset(coords={"foo": ("x", ["a", "a", "b", "b"])}) From 52922374255ad97bc68be8735779d4a67c597bbd Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Fri, 12 Dec 2025 11:19:50 -0500 Subject: [PATCH 2/5] keep important part of comment --- doc/internals/how-to-create-custom-index.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst index 4621e052539..dbc24b6fb54 100644 --- a/doc/internals/how-to-create-custom-index.rst +++ b/doc/internals/how-to-create-custom-index.rst @@ -224,6 +224,7 @@ custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: dims=("y", "x"), ) + # Build a RasterIndex from the 'x' and 'y' coordinates # Xarray creates default indexes for the 'x' and 'y' coordinates # Use drop_existing=True to replace them with a custom index da_raster = da.set_xindex(["x", "y"], RasterIndex, drop_existing=True) From 3ffb62731a8477cfb12d17fc8195cac16dd505b1 Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Fri, 12 Dec 2025 11:20:27 -0500 Subject: [PATCH 3/5] PR number --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 4bdafa3eb82..b0a42e81e06 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -16,7 +16,7 @@ New Features - Added ``drop_existing`` parameter to :py:meth:`Dataset.set_xindex` and :py:meth:`DataArray.set_xindex` to allow replacing existing indexes without - needing to call :py:meth:`drop_indexes` first (:pull:`XXXX`). + needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. Breaking Changes From f066226cb2e7f99c940ca177e95f51fa801b493d Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 10:43:25 -0500 Subject: [PATCH 4/5] no kwarg - always drop --- doc/internals/how-to-create-custom-index.rst | 4 ++-- doc/whats-new.rst | 6 +++--- xarray/core/dataarray.py | 10 ++-------- xarray/core/dataset.py | 18 +----------------- xarray/tests/test_dataarray.py | 3 +-- xarray/tests/test_dataset.py | 7 ++----- 6 files changed, 11 insertions(+), 37 deletions(-) diff --git a/doc/internals/how-to-create-custom-index.rst b/doc/internals/how-to-create-custom-index.rst index dbc24b6fb54..2002915ac84 100644 --- a/doc/internals/how-to-create-custom-index.rst +++ b/doc/internals/how-to-create-custom-index.rst @@ -226,8 +226,8 @@ custom index to a Dataset or DataArray, e.g., using the ``RasterIndex`` above: # Build a RasterIndex from the 'x' and 'y' coordinates # Xarray creates default indexes for the 'x' and 'y' coordinates - # Use drop_existing=True to replace them with a custom index - da_raster = da.set_xindex(["x", "y"], RasterIndex, drop_existing=True) + # this will automatically drop those indexes + da_raster = da.set_xindex(["x", "y"], RasterIndex) # RasterIndex now takes care of label-based selection selected = da_raster.sel(x=10, y=slice(20, 50)) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 111603f4167..1c463e885fc 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -14,9 +14,9 @@ v2025.12.1 (unreleased) New Features ~~~~~~~~~~~~ -- Added ``drop_existing`` parameter to :py:meth:`Dataset.set_xindex` and - :py:meth:`DataArray.set_xindex` to allow replacing existing indexes without - needing to call :py:meth:`drop_indexes` first (:pull:`11008`). +- :py:meth:`Dataset.set_xindex` and :py:meth:`DataArray.set_xindex` + automatically replace any existing index being set instead of erroring + or needing needing to call :py:meth:`drop_indexes` first (:pull:`11008`). By `Ian Hunt-Isaak `_. Breaking Changes diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index d1fc3796421..80f7cb6d011 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2866,11 +2866,10 @@ def set_xindex( self, coord_names: str | Sequence[Hashable], index_cls: type[Index] | None = None, - drop_existing: bool = False, **options, ) -> Self: """Set a new, Xarray-compatible index from one or more existing - coordinate(s). + coordinate(s). Existing index(es) on the coord(s) will be replaced. Parameters ---------- @@ -2880,9 +2879,6 @@ def set_xindex( index_cls : subclass of :class:`~xarray.indexes.Index` The type of index to create. By default, try setting a pandas (multi-)index from the supplied coordinates. - drop_existing : bool - Whether to drop indexes on any existing coord_names if one - is present. **options Options passed to the index constructor. @@ -2892,9 +2888,7 @@ def set_xindex( Another dataarray, with this dataarray's data and with a new index. """ - ds = self._to_temp_dataset().set_xindex( - coord_names, index_cls, drop_existing, **options - ) + ds = self._to_temp_dataset().set_xindex(coord_names, index_cls, **options) return self._from_temp_dataset(ds) def reorder_levels( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 9f6a32e8352..10b7070736b 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4956,11 +4956,10 @@ def set_xindex( self, coord_names: str | Sequence[Hashable], index_cls: type[Index] | None = None, - drop_existing: bool = False, **options, ) -> Self: """Set a new, Xarray-compatible index from one or more existing - coordinate(s). + coordinate(s). Existing index(es) on the coord(s) will be replaced. Parameters ---------- @@ -4971,9 +4970,6 @@ def set_xindex( The type of index to create. By default, try setting a ``PandasIndex`` if ``len(coord_names) == 1``, otherwise a ``PandasMultiIndex``. - drop_existing : bool - Whether to drop indexes on any existing coord_names if one - is present **options Options passed to the index constructor. @@ -5009,18 +5005,6 @@ def set_xindex( ) raise ValueError("\n".join(msg)) - # we could be more clever here (e.g., drop-in index replacement if index - # coordinates do not conflict), but let's not allow this for now - indexed_coords = set(coord_names) & set(self._indexes) - - if indexed_coords: - if drop_existing: - self.drop_indexes(indexed_coords) - else: - raise ValueError( - f"those coordinates already have an index: {indexed_coords}" - ) - coord_vars = {name: self._variables[name] for name in coord_names} index = index_cls.from_variables(coord_vars, options=options) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 5ea327f5ece..df9d29843ff 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2419,9 +2419,8 @@ def from_variables(cls, variables, options): assert indexed.xindexes["foo"].opt == 1 # type: ignore[attr-defined] def test_set_xindex_drop_existing(self) -> None: - # Basic test that drop_existing parameter is passed through to Dataset da = DataArray([1, 2, 3, 4], coords={"x": ("x", [0, 1, 2, 3])}, dims="x") - result = da.set_xindex("x", PandasIndex, drop_existing=True) + result = da.set_xindex("x", PandasIndex) assert "x" in result.xindexes def test_dataset_getitem(self) -> None: diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index e87cb45d631..4684ef2df14 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -4001,20 +4001,17 @@ class NotAnIndex: ... ds2.set_xindex("x", PandasIndex) def test_set_xindex_drop_existing(self) -> None: - # Test that drop_existing=True allows replacing an existing index - # (the default drop_existing=False raising ValueError is tested in test_set_xindex) ds = Dataset(coords={"x": ("x", [0, 1, 2, 3])}) # With drop_existing=True, it should succeed - result = ds.set_xindex("x", PandasIndex, drop_existing=True) + result = ds.set_xindex("x", PandasIndex) assert "x" in result.xindexes assert isinstance(result.xindexes["x"], PandasIndex) - # Test that drop_existing=True replaces with a custom index class CustomIndex(PandasIndex): pass - result_custom = ds.set_xindex("x", CustomIndex, drop_existing=True) + result_custom = ds.set_xindex("x", CustomIndex) assert "x" in result_custom.xindexes assert isinstance(result_custom.xindexes["x"], CustomIndex) From 9b7e60d22aea691fbe16bedd4f9c249a42f43fed Mon Sep 17 00:00:00 2001 From: Ian Hunt-Isaak Date: Wed, 17 Dec 2025 10:46:31 -0500 Subject: [PATCH 5/5] combine tests --- xarray/tests/test_dataset.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 4684ef2df14..17d3e25b642 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -3995,12 +3995,6 @@ class NotAnIndex: ... with pytest.raises(ValueError, match="those variables are data variables"): ds.set_xindex("data_var", PandasIndex) - ds2 = Dataset(coords={"x": ("x", [0, 1, 2, 3])}) - - with pytest.raises(ValueError, match="those coordinates already have an index"): - ds2.set_xindex("x", PandasIndex) - - def test_set_xindex_drop_existing(self) -> None: ds = Dataset(coords={"x": ("x", [0, 1, 2, 3])}) # With drop_existing=True, it should succeed