pandas-dev · pandeconscious · Oct 23, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 27, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -239,6 +239,7 @@ Other enhancements
 - :meth:`DataFrame.apply` supports using third-party execution engines like the Bodo.ai JIT compiler (:issue:`60668`)
 - :meth:`DataFrame.iloc` and :meth:`Series.iloc` now support boolean masks in ``__getitem__`` for more consistent indexing behavior (:issue:`60994`)
 - :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support f-strings (e.g., ``"{:.6f}"``) for the ``float_format`` parameter, in addition to the ``%`` format strings and callables (:issue:`49580`)
+- :meth:`Series.corr`, :meth:`DataFrame.corr`, :meth:`DataFrame.corrwith` with ``method="kendall"`` and ``method="spearman"`` now work with ordered categorical data types (:issue:`60306`)
 - :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
 - :meth:`Series.map` now accepts an ``engine`` parameter to allow execution with a third-party execution engine (:issue:`61125`)
 - :meth:`Series.nlargest` uses stable sort internally and will preserve original ordering in the case of equality (:issue:`55767`)

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -185,6 +185,7 @@
     treat_as_nested,
 )
 from pandas.core.methods import selectn
+from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols
 from pandas.core.reshape.melt import melt
 from pandas.core.series import Series
 from pandas.core.shared_docs import _shared_docs
@@ -11718,6 +11719,10 @@ def corr(
         data = self._get_numeric_data() if numeric_only else self
         cols = data.columns
         idx = cols.copy()
+
+        if method in ("spearman", "kendall"):
+            data = transform_ord_cat_cols_to_coded_cols(data)
+
         mat = data.to_numpy(dtype=float, na_value=np.nan, copy=False)
 
         if method == "pearson":
@@ -12007,6 +12012,8 @@ def corrwith(
             correl = num / dom
 
         elif method in ["kendall", "spearman"] or callable(method):
+            left = transform_ord_cat_cols_to_coded_cols(left)
+            right = transform_ord_cat_cols_to_coded_cols(right)
 
             def c(x):
                 return nanops.nancorr(x[0], x[1], method=method)

diff --git a/pandas/core/methods/corr.py b/pandas/core/methods/corr.py
@@ -0,0 +1,32 @@
+"""
+Module for correlation related implementation
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+if TYPE_CHECKING:
+    from pandas import DataFrame
+
+
+def transform_ord_cat_cols_to_coded_cols(df: DataFrame) -> DataFrame:
+    """
+    Replace ordered categoricals with their codes, making a shallow copy if necessary.
+    """
+
+    result = df
+    made_copy = False
+    for idx, dtype in enumerate(df.dtypes):
+        if not isinstance(dtype, CategoricalDtype) or not dtype.ordered:
+            continue
+        col = result._ixs(idx, axis=1)
+        if not made_copy:
+            made_copy = True
+            result = result.copy(deep=False)
+        result._iset_item(idx, col.cat.codes.replace(-1, np.nan))
+    return result
diff --git a/pandas/core/series.py b/pandas/core/series.py
@@ -2750,6 +2750,12 @@ def corr(
         if len(this) == 0:
             return np.nan
 
+        if method in ("spearman", "kendall"):
+            if this.dtype == "category" and this.cat.ordered:
+                this = this.cat.codes.replace(-1, np.nan)
+            if other.dtype == "category" and other.cat.ordered:
+                other = other.cat.codes.replace(-1, np.nan)
+
         this_values = this.to_numpy(dtype=float, na_value=np.nan, copy=False)
         other_values = other.to_numpy(dtype=float, na_value=np.nan, copy=False)
 

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -252,6 +252,59 @@ def test_corr_numeric_only(self, meth, numeric_only):
             with pytest.raises(ValueError, match="could not convert string to float"):
                 df.corr(meth, numeric_only=numeric_only)
 
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize("col1", ["ord_cat", "ord_cat_none", "ord_cat_shuff"])
+    @pytest.mark.parametrize("col2", ["ord_cat", "ord_cat_none", "ord_cat_shuff"])
+    @td.skip_if_no("scipy")
+    def test_corr_rank_ordered_categorical(self, method, col1, col2):
+        # GH #60306
+        df = DataFrame(
+            {
+                "ord_cat": pd.Categorical(
+                    ["low", "m", "h", "vh"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+                "ord_cat_none": pd.Categorical(
+                    ["low", "m", "h", None],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+                "ord_cat_shuff": pd.Categorical(
+                    ["m", "h", "vh", "low"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+            }
+        )
+        corr_calc = df.corr(method=method)
+        corr_expected = df[col1].corr(df[col2], method=method)
+        tm.assert_almost_equal(corr_calc[col1][col2], corr_expected)
+
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize("col1_idx", [0, 1, 2, 3, 4])
+    @pytest.mark.parametrize("col2_idx", [0, 1, 2, 3, 4])
+    @td.skip_if_no("scipy")
+    def test_corr_rank_ordered_categorical_duplicate_columns(
+        self, method, col1_idx, col2_idx
+    ):
+        # GH #60306
+        cat = pd.CategoricalDtype(categories=[4, 3, 2, 1], ordered=True)
+        df = DataFrame(
+            {
+                "a": pd.array([1, 2, 3, 4], dtype=cat),
+                "b": pd.array([4, 3, 2, 1], dtype=cat),
+                "c": [4, 3, 2, 1],
+                "d": [10, 20, 30, 40],
+                "e": [100, 200, 300, 400],
+            }
+        )
+        df.columns = ["a", "a", "c", "c", "e"]
+
+        corr_calc = df.corr(method=method)
+        corr_expected = df.iloc[:, col1_idx].corr(df.iloc[:, col2_idx], method=method)
+        tm.assert_almost_equal(corr_calc.iloc[col1_idx, col2_idx], corr_expected)
+
 
 class TestDataFrameCorrWith:
     @pytest.mark.parametrize(
@@ -493,3 +546,42 @@ def test_cov_with_missing_values(self):
         result2 = df.dropna().cov()
         tm.assert_frame_equal(result1, expected)
         tm.assert_frame_equal(result2, expected)
+
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize("col", ["a", "b", "c", "d"])
+    def test_corr_rank_ordered_categorical(self, method, col):
+        # GH #60306
+        pytest.importorskip("scipy")
+        df1 = DataFrame(
+            {
+                "a": pd.Categorical(
+                    ["low", "m", "h", "vh"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+                "b": pd.Categorical(
+                    ["low", "m", "h", None],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+                "c": [0, 1, 2, 3],
+                "d": [2.0, 3.0, 4.5, 6.5],
+            }
+        )
+
+        df2 = DataFrame(
+            {
+                "a": [2.0, 3.0, 4.5, np.nan],
+                "b": pd.Categorical(
+                    ["m", "h", "vh", "low"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+                "c": [2, 3, 0, 1],
+                "d": [2.0, 3.0, 4.5, 6.5],
+            }
+        )
+
+        corr_calc = df1.corrwith(df2, method=method)
+        corr_expected = df1[col].corr(df2[col], method=method)
+        tm.assert_almost_equal(corr_calc.get(col), corr_expected)
diff --git a/pandas/tests/methods/corr.py b/pandas/tests/methods/corr.py
@@ -0,0 +1,132 @@
+"""
+Tests for core/methods/corr.py
+"""
+
+import numpy as np
+import pytest
+
+from pandas import (
+    Categorical,
+    DataFrame,
+    Series,
+)
+import pandas._testing as tm
+from pandas.core.methods.corr import transform_ord_cat_cols_to_coded_cols
+
+
+@pytest.mark.parametrize(
+    ("input_df_dict", "expected_df_dict"),
+    [
+        pytest.param(
+            # 1) Simple: two ordered categorical columns (with and without None)
+            {
+                "ord_cat": Categorical(
+                    ["low", "m", "h", "vh"],
+                    categories=["low", "m", "h", "vh"],
+                    ordered=True,
+                ),
+                "ord_cat_none": Categorical(
+                    ["low", "m", "h", None],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+            },
+            {
+                # codes: low=0, m=1, h=2, vh=3
+                "ord_cat": Series([0, 1, 2, 3], dtype="int8"),
+                # codes: low=0, m=1, h=2, None -> NaN
+                "ord_cat_none": [0, 1.0, 2.0, np.nan],
+            },
+            id="ordered-categoricals-basic",
+        ),
+        pytest.param(
+            # 2) Mixed dtypes: only the ordered categorical should change
+            {
+                "ordered": Categorical(
+                    ["a", "c", "b"],
+                    categories=["a", "b", "c"],
+                    ordered=True,
+                ),
+                "unordered": Categorical(["x", "y", "x"], ordered=False),
+                "num": [10, 20, 30],
+                "text": ["u", "v", "w"],
+            },
+            {
+                # codes: a=0, c=2, b=1
+                "ordered": Series([0, 2, 1], dtype="int8"),
+                # unordered categorical should be untouched (still categorical)
+                "unordered": Categorical(["x", "y", "x"], ordered=False),
+                "num": [10, 20, 30],
+                "text": ["u", "v", "w"],
+            },
+            id="mixed-types-only-ordered-changes",
+        ),
+    ],
+)
+def test_transform_ord_cat_cols_to_coded_cols(
+    input_df_dict: dict, expected_df_dict: dict
+) -> None:
+    # GH #60306
+    input_df = DataFrame(input_df_dict)
+    expected_df = DataFrame(expected_df_dict)
+    out_df = transform_ord_cat_cols_to_coded_cols(input_df)
+    assert list(out_df.columns) == list(expected_df.columns)
+    tm.assert_frame_equal(out_df, expected_df)
+
+
+@pytest.mark.parametrize(
+    ("input_df_dict", "expected_df_dict"),
+    [
+        pytest.param(
+            {
+                "dup_1": Categorical(
+                    ["low", "m", "h"],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+                "dup_2": [5, 6, 7],
+            },
+            {
+                # After transform: position 0 (ordered cat) becomes codes [0,1,2],
+                # position 1 remains untouched numbers [5,6,7].
+                "dup_1": Series([0, 1, 2], dtype="int8"),
+                "dup_2": [5, 6, 7],
+            },
+            id="duplicate-names-ordered-first",
+        ),
+        pytest.param(
+            {
+                "dup_1": ["a", "b", "c"],  # non-categorical
+                "dup_2": Categorical(
+                    ["p", "q", None],
+                    categories=["p", "q"],
+                    ordered=True,
+                ),
+                "dup_3": Categorical(
+                    ["low", "m", "h"],
+                    categories=["low", "m", "h"],
+                    ordered=True,
+                ),
+            },
+            {
+                # First stays object; second turns into codes [0, 1, NaN]
+                # and third changes into codes [0, 1, 2]
+                "dup_1": ["a", "b", "c"],
+                "dup_2": [0.0, 1.0, np.nan],
+                "dup_3": Series([0, 1, 2], dtype="int8"),
+            },
+            id="duplicate-names-ordered-and-non-categorical-and-none",
+        ),
+    ],
+)
+def test_transform_ord_cat_cols_to_coded_cols_duplicated_col(
+    input_df_dict: dict, expected_df_dict: dict
+) -> None:
+    # GH #60306
+    input_df = DataFrame(input_df_dict)
+    expected_df = DataFrame(expected_df_dict)
+    input_df.columns = ["dup" for _ in input_df.columns]
+    expected_df.columns = ["dup" for _ in expected_df.columns]
+
+    out_df = transform_ord_cat_cols_to_coded_cols(input_df)
+    tm.assert_frame_equal(out_df, expected_df)
diff --git a/pandas/tests/series/methods/test_cov_corr.py b/pandas/tests/series/methods/test_cov_corr.py
@@ -3,6 +3,8 @@
 import numpy as np
 import pytest
 
+import pandas.util._test_decorators as td
+
 import pandas as pd
 from pandas import (
     Series,
@@ -184,3 +186,45 @@ def test_corr_callable_method(self, datetime_series):
         df = pd.DataFrame([s1, s2])
         expected = pd.DataFrame([{0: 1.0, 1: 0}, {0: 0, 1: 1.0}])
         tm.assert_almost_equal(df.transpose().corr(method=my_corr), expected)
+
+    @td.skip_if_no("scipy")
+    @pytest.mark.parametrize("method", ["kendall", "spearman"])
+    @pytest.mark.parametrize(
+        "cat_series_inpt",
+        [
+            pd.Categorical(  # ordered cat series
+                ["low", "medium", "high"],
+                categories=["low", "medium", "high"],
+                ordered=True,
+            ),
+            pd.Categorical(  # ordered cat series with NA
+                ["low", "medium", "high", None],
+                categories=["low", "medium", "high"],
+                ordered=True,
+            ),
+        ],
+    )
+    @pytest.mark.parametrize(
+        "other_series_inpt",
+        [
+            pd.Categorical(  # other cat ordered series
+                ["m", "l", "h"],
+                categories=["l", "m", "h"],
+                ordered=True,
+            ),
+            # other non cat series
+            [2, 1, 3],
+        ],
+    )
+    def test_corr_rank_ordered_categorical(
+        self,
+        method,
+        cat_series_inpt,
+        other_series_inpt,
+    ):
+        # GH #60306
+        expected_corr = {"kendall": 0.33333333333333337, "spearman": 0.5}
+        cat_series = Series(cat_series_inpt)
+        other_series = Series(other_series_inpt)
+        corr_calc = cat_series.corr(other_series, method=method)
+        tm.assert_almost_equal(corr_calc, expected_corr[method])