Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 21 additions & 10 deletions xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,12 +79,16 @@ def check_reduce_dims(reduce_dims, dimensions):
if reduce_dims is not ...:
if is_scalar(reduce_dims):
reduce_dims = [reduce_dims]
if any(dim not in dimensions for dim in reduce_dims):
raise ValueError(
f"cannot reduce over dimensions {reduce_dims!r}. expected either '...' "
f"to reduce over all dimensions or one or more of {dimensions!r}. "
f"Alternatively, install the `flox` package. "
)
missing_dims = [dim for dim in reduce_dims if dim not in dimensions]
if missing_dims:
if len(missing_dims) == 1:
raise ValueError(
f"{missing_dims[0]!r} not found in array dimensions {dimensions!r}"
)
else:
raise ValueError(
f"dimensions {missing_dims!r} not found in array dimensions {dimensions!r}"
)


def _codes_to_group_indices(codes: np.ndarray, N: int) -> GroupIndices:
Expand Down Expand Up @@ -1108,10 +1112,17 @@ def _flox_reduce(
# Do this so we raise the same error message whether flox is present or not.
# Better to control it here than in flox.
for grouper in self.groupers:
if any(
d not in grouper.codes.dims and d not in obj.dims for d in parsed_dim
):
raise ValueError(f"cannot reduce over dimensions {dim}.")
all_dims = set(grouper.codes.dims) | set(obj.dims)
missing = [d for d in parsed_dim if d not in all_dims]
if missing:
if len(missing) == 1:
raise ValueError(
f"{missing[0]!r} not found in array dimensions {tuple(obj.dims)!r}"
)
else:
raise ValueError(
f"dimensions {missing!r} not found in array dimensions {tuple(obj.dims)!r}"
)

has_missing_groups = (
self.encoded.unique_coord.size != self.encoded.full_index.size
Expand Down
4 changes: 2 additions & 2 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -771,10 +771,10 @@ def test_groupby_reduce_dimension_error(array) -> None:
grouped = array.groupby("y")
# assert_identical(array, grouped.mean())

with pytest.raises(ValueError, match=r"cannot reduce over dimensions"):
with pytest.raises(ValueError, match=r"not found in array dimensions"):
grouped.mean("huh")

with pytest.raises(ValueError, match=r"cannot reduce over dimensions"):
with pytest.raises(ValueError, match=r"not found in array dimensions"):
grouped.mean(("x", "y", "asd"))

assert_identical(array.mean("x"), grouped.reduce(np.mean, "x"))
Expand Down
90 changes: 90 additions & 0 deletions xarray/tests/test_issue_10875.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Test for issue #10875: Clear error message when reducing over non-existent dimension."""

import numpy as np
import pytest

import xarray as xr


class TestGroupbyDimensionError:
"""Tests for clearer error messages in groupby reduce operations."""

def test_groupby_reduce_missing_dim_single(self):
"""Groupby reduce with single missing dimension should have clear error."""
ds = xr.DataArray(
np.reshape(range(27), (3, 3, 3)),
coords=dict(
lon=range(3),
lat=range(3),
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
),
)

with pytest.raises(
ValueError, match=r"'longitude' not found in array dimensions"
):
ds.groupby("time").std(dim="longitude")

def test_groupby_reduce_missing_dim_multiple(self):
"""Groupby reduce with multiple missing dimensions should list them."""
ds = xr.DataArray(
np.reshape(range(27), (3, 3, 3)),
coords=dict(
lon=range(3),
lat=range(3),
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
),
)

with pytest.raises(ValueError, match=r"not found in array dimensions"):
ds.groupby("time").std(dim=["longitude", "latitude"])

def test_standard_reduce_error_matches(self):
"""Standard reduce and groupby reduce should have similar error format."""
ds = xr.DataArray(
np.reshape(range(27), (3, 3, 3)),
coords=dict(
lon=range(3),
lat=range(3),
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
),
)

standard_error_msg = None
try:
ds.std(dim="longitude")
except ValueError as e:
standard_error_msg = str(e)

groupby_error_msg = None
try:
ds.groupby("time").std(dim="longitude")
except ValueError as e:
groupby_error_msg = str(e)

assert standard_error_msg is not None, (
"Expected ValueError from ds.std(dim='longitude')"
)
assert groupby_error_msg is not None, (
"Expected ValueError from groupby.std(dim='longitude')"
)
assert "longitude" in standard_error_msg
assert "longitude" in groupby_error_msg
assert "not found in array dimensions" in standard_error_msg
assert "not found in array dimensions" in groupby_error_msg

def test_groupby_reduce_valid_dim_still_works(self):
"""Ensure valid dimensions still work correctly."""
ds = xr.DataArray(
np.reshape(range(27), (3, 3, 3)),
dims=["lon", "lat", "time"],
coords=dict(
lon=range(3),
lat=range(3),
time=xr.date_range("2025-10-01 00:00", "2025-10-01 02:00", freq="h"),
),
)

result = ds.groupby("time").std(dim="lon")
assert result is not None
assert "lon" not in result.dims
Loading