diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index bce048048da..a0adfe3769c 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -7202,11 +7202,23 @@ def to_pandas(self) -> pd.Series | pd.DataFrame: def _to_dataframe(self, ordered_dims: Mapping[Any, int]): from xarray.core.extension_array import PandasExtensionArray - - # All and only non-index arrays (whether data or coordinates) should - # become columns in the output DataFrame. Excluding indexes rather - # than dims handles the case of a MultiIndex along a single dimension. - columns_in_order = [k for k in self.variables if k not in self.xindexes] + from xarray.core.indexes import PandasIndex, PandasMultiIndex + + # All non-index variables become columns. For indexes, I excluded: + # 1. PandasMultiIndex components (A, B from a MultiIndex) + # 2. PandasIndex where name matches dim (e.g., 'x' indexing dim 'x') + # 3. Any index whose name matches a dimension + # This allows PandasIndex coords created via set_xindex with a different + # name (e.g., 'pf' indexing dim 'pos') to be included as columns. + indexes_to_exclude = set() + for name, idx in self.xindexes.items(): + if ( + isinstance(idx, PandasMultiIndex) + or (isinstance(idx, PandasIndex) and name == idx.dim) + or name in self.dims + ): + indexes_to_exclude.add(name) + columns_in_order = [k for k in self.variables if k not in indexes_to_exclude] non_extension_array_columns = [ k for k in columns_in_order diff --git a/xarray/tests/test_issue_10851.py b/xarray/tests/test_issue_10851.py new file mode 100644 index 00000000000..dae48cde1d1 --- /dev/null +++ b/xarray/tests/test_issue_10851.py @@ -0,0 +1,52 @@ +"""Test for issue #10851: Dataset Index not included in to_dataframe when name differs from dimension.""" + +import numpy as np +import pandas as pd + +import xarray as xr + + +class TestToDataFrameIndexColumn: + """Tests for to_dataframe including index coordinates with different names.""" + + def test_to_dataframe_includes_index_with_different_name(self): + """Index coordinates with name different from dimension should be in columns.""" + ds_temp = xr.Dataset( + data_vars=dict(temp=(["time", "pos"], np.array([[5, 10, 15, 20, 25]]))), + coords=dict( + pf=("pos", [1.0, 2.0, 4.2, 8.0, 10.0]), + time=("time", [pd.to_datetime("2025-01-01")]), + ), + ).set_xindex("pf") + + df = ds_temp.to_dataframe() + + assert "pf" in df.columns + assert "temp" in df.columns + np.testing.assert_array_equal(df["pf"].values, [1.0, 2.0, 4.2, 8.0, 10.0]) + + def test_to_dataframe_still_excludes_matching_dim_index(self): + """Index coordinates where name matches dimension should not be in columns.""" + ds = xr.Dataset( + data_vars=dict(temp=(["x"], [1, 2, 3])), + coords=dict(x=("x", [10, 20, 30])), + ) + + df = ds.to_dataframe() + + assert "temp" in df.columns + assert "x" not in df.columns + + def test_to_dataframe_roundtrip_with_set_xindex(self): + """Dataset with set_xindex should roundtrip to DataFrame correctly.""" + ds = xr.Dataset( + data_vars=dict(val=(["dim"], [100, 200, 300])), + coords=dict(coord_idx=("dim", ["a", "b", "c"])), + ).set_xindex("coord_idx") + + df = ds.to_dataframe() + + assert "coord_idx" in df.columns + assert "val" in df.columns + assert list(df["coord_idx"]) == ["a", "b", "c"] + assert list(df["val"]) == [100, 200, 300]