diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 827c0a3588f..06dc5477f38 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -79,12 +79,16 @@ def check_reduce_dims(reduce_dims, dimensions): if reduce_dims is not ...: if is_scalar(reduce_dims): reduce_dims = [reduce_dims] - if any(dim not in dimensions for dim in reduce_dims): - raise ValueError( - f"cannot reduce over dimensions {reduce_dims!r}. expected either '...' " - f"to reduce over all dimensions or one or more of {dimensions!r}. " - f"Alternatively, install the `flox` package. " - ) + missing_dims = [dim for dim in reduce_dims if dim not in dimensions] + if missing_dims: + if len(missing_dims) == 1: + raise ValueError( + f"{missing_dims[0]!r} not found in array dimensions {dimensions!r}" + ) + else: + raise ValueError( + f"dimensions {missing_dims!r} not found in array dimensions {dimensions!r}" + ) def _codes_to_group_indices(codes: np.ndarray, N: int) -> GroupIndices: @@ -1108,10 +1112,17 @@ def _flox_reduce( # Do this so we raise the same error message whether flox is present or not. # Better to control it here than in flox. for grouper in self.groupers: - if any( - d not in grouper.codes.dims and d not in obj.dims for d in parsed_dim - ): - raise ValueError(f"cannot reduce over dimensions {dim}.") + all_dims = set(grouper.codes.dims) | set(obj.dims) + missing = [d for d in parsed_dim if d not in all_dims] + if missing: + if len(missing) == 1: + raise ValueError( + f"{missing[0]!r} not found in array dimensions {tuple(obj.dims)!r}" + ) + else: + raise ValueError( + f"dimensions {missing!r} not found in array dimensions {tuple(obj.dims)!r}" + ) has_missing_groups = ( self.encoded.unique_coord.size != self.encoded.full_index.size diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 47ea2fcd2b0..231b446739f 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -771,10 +771,10 @@ def test_groupby_reduce_dimension_error(array) -> None: grouped = array.groupby("y") # assert_identical(array, grouped.mean()) - with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): + with pytest.raises(ValueError, match=r"not found in array dimensions"): grouped.mean("huh") - with pytest.raises(ValueError, match=r"cannot reduce over dimensions"): + with pytest.raises(ValueError, match=r"not found in array dimensions"): grouped.mean(("x", "y", "asd")) assert_identical(array.mean("x"), grouped.reduce(np.mean, "x")) diff --git a/xarray/tests/test_issue_10875.py b/xarray/tests/test_issue_10875.py new file mode 100644 index 00000000000..5fc4d68116a --- /dev/null +++ b/xarray/tests/test_issue_10875.py @@ -0,0 +1,90 @@ +"""Test for issue #10875: Clear error message when reducing over non-existent dimension.""" + +import numpy as np +import pytest + +import xarray as xr + + +class TestGroupbyDimensionError: + """Tests for clearer error messages in groupby reduce operations.""" + + def test_groupby_reduce_missing_dim_single(self): + """Groupby reduce with single missing dimension should have clear error.""" + ds = xr.DataArray( + np.reshape(range(27), (3, 3, 3)), + coords=dict( + lon=range(3), + lat=range(3), + time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"), + ), + ) + + with pytest.raises( + ValueError, match=r"'longitude' not found in array dimensions" + ): + ds.groupby("time").std(dim="longitude") + + def test_groupby_reduce_missing_dim_multiple(self): + """Groupby reduce with multiple missing dimensions should list them.""" + ds = xr.DataArray( + np.reshape(range(27), (3, 3, 3)), + coords=dict( + lon=range(3), + lat=range(3), + time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"), + ), + ) + + with pytest.raises(ValueError, match=r"not found in array dimensions"): + ds.groupby("time").std(dim=["longitude", "latitude"]) + + def test_standard_reduce_error_matches(self): + """Standard reduce and groupby reduce should have similar error format.""" + ds = xr.DataArray( + np.reshape(range(27), (3, 3, 3)), + coords=dict( + lon=range(3), + lat=range(3), + time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"), + ), + ) + + standard_error_msg = None + try: + ds.std(dim="longitude") + except ValueError as e: + standard_error_msg = str(e) + + groupby_error_msg = None + try: + ds.groupby("time").std(dim="longitude") + except ValueError as e: + groupby_error_msg = str(e) + + assert standard_error_msg is not None, ( + "Expected ValueError from ds.std(dim='longitude')" + ) + assert groupby_error_msg is not None, ( + "Expected ValueError from groupby.std(dim='longitude')" + ) + assert "longitude" in standard_error_msg + assert "longitude" in groupby_error_msg + assert "not found in array dimensions" in standard_error_msg + assert "not found in array dimensions" in groupby_error_msg + + def test_groupby_reduce_valid_dim_still_works(self): + """Ensure valid dimensions still work correctly.""" + ds = xr.DataArray( + np.reshape(range(27), (3, 3, 3)), + dims=["lon", "lat", "time"], + coords=dict( + lon=range(3), + lat=range(3), + time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"), + ), + ) + + result = ds.groupby("time").std(dim="lon") + assert result is not None + assert "lon" not in result.dims