Skip to content

Commit 7bf6660

Browse files
authored
BUG: groupby raises on non-C-contiguous masks (#63148)
1 parent 49f4a94 commit 7bf6660

File tree

3 files changed

+33
-13
lines changed

3 files changed

+33
-13
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,6 +1267,7 @@ Groupby/resample/rolling
12671267
- Bug in :meth:`Series.resample` could raise when the date range ended shortly before a non-existent time. (:issue:`58380`)
12681268
- Bug in :meth:`Series.resample` raising error when resampling non-nanosecond resolutions out of bounds for nanosecond precision (:issue:`57427`)
12691269
- Bug in :meth:`Series.rolling.var` and :meth:`Series.rolling.std` computing incorrect results due to numerical instability. (:issue:`47721`, :issue:`52407`, :issue:`54518`, :issue:`55343`)
1270+
- Bug in :meth:`DataFrame.groupby` methods when operating on NumPy-nullable data failing when the NA mask was not C-contiguous (:issue:`61031`)
12701271

12711272
Reshaping
12721273
^^^^^^^^^

pandas/_libs/groupby.pyx

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -819,7 +819,7 @@ def group_prod(
819819
int64_t[::1] counts,
820820
ndarray[int64float_t, ndim=2] values,
821821
const intp_t[::1] labels,
822-
const uint8_t[:, ::1] mask,
822+
const uint8_t[:, :] mask,
823823
uint8_t[:, ::1] result_mask=None,
824824
Py_ssize_t min_count=0,
825825
bint skipna=True,
@@ -893,7 +893,7 @@ def group_var(
893893
const intp_t[::1] labels,
894894
Py_ssize_t min_count=-1,
895895
int64_t ddof=1,
896-
const uint8_t[:, ::1] mask=None,
896+
const uint8_t[:, :] mask=None,
897897
uint8_t[:, ::1] result_mask=None,
898898
bint is_datetimelike=False,
899899
str name="var",
@@ -998,7 +998,7 @@ def group_skew(
998998
int64_t[::1] counts,
999999
ndarray[float64_t, ndim=2] values,
10001000
const intp_t[::1] labels,
1001-
const uint8_t[:, ::1] mask=None,
1001+
const uint8_t[:, :] mask=None,
10021002
uint8_t[:, ::1] result_mask=None,
10031003
bint skipna=True,
10041004
) -> None:
@@ -1086,7 +1086,7 @@ def group_kurt(
10861086
int64_t[::1] counts,
10871087
ndarray[float64_t, ndim=2] values,
10881088
const intp_t[::1] labels,
1089-
const uint8_t[:, ::1] mask=None,
1089+
const uint8_t[:, :] mask=None,
10901090
uint8_t[:, ::1] result_mask=None,
10911091
bint skipna=True,
10921092
) -> None:
@@ -1180,7 +1180,7 @@ def group_mean(
11801180
const intp_t[::1] labels,
11811181
Py_ssize_t min_count=-1,
11821182
bint is_datetimelike=False,
1183-
const uint8_t[:, ::1] mask=None,
1183+
const uint8_t[:, :] mask=None,
11841184
uint8_t[:, ::1] result_mask=None,
11851185
bint skipna=True,
11861186
) -> None:
@@ -1324,7 +1324,7 @@ def group_ohlc(
13241324
ndarray[int64float_t, ndim=2] values,
13251325
const intp_t[::1] labels,
13261326
Py_ssize_t min_count=-1,
1327-
const uint8_t[:, ::1] mask=None,
1327+
const uint8_t[:, :] mask=None,
13281328
uint8_t[:, ::1] result_mask=None,
13291329
) -> None:
13301330
"""
@@ -1870,7 +1870,7 @@ cdef group_min_max(
18701870
Py_ssize_t min_count=-1,
18711871
bint is_datetimelike=False,
18721872
bint compute_max=True,
1873-
const uint8_t[:, ::1] mask=None,
1873+
const uint8_t[:, :] mask=None,
18741874
uint8_t[:, ::1] result_mask=None,
18751875
bint skipna=True,
18761876
):
@@ -1983,7 +1983,7 @@ def group_idxmin_idxmax(
19831983
const intp_t[::1] labels,
19841984
Py_ssize_t min_count=-1,
19851985
bint is_datetimelike=False,
1986-
const uint8_t[:, ::1] mask=None,
1986+
const uint8_t[:, :] mask=None,
19871987
str name="idxmin",
19881988
bint skipna=True,
19891989
uint8_t[:, ::1] result_mask=None,
@@ -2096,7 +2096,7 @@ def group_max(
20962096
const intp_t[::1] labels,
20972097
Py_ssize_t min_count=-1,
20982098
bint is_datetimelike=False,
2099-
const uint8_t[:, ::1] mask=None,
2099+
const uint8_t[:, :] mask=None,
21002100
uint8_t[:, ::1] result_mask=None,
21012101
bint skipna=True,
21022102
) -> None:
@@ -2124,7 +2124,7 @@ def group_min(
21242124
const intp_t[::1] labels,
21252125
Py_ssize_t min_count=-1,
21262126
bint is_datetimelike=False,
2127-
const uint8_t[:, ::1] mask=None,
2127+
const uint8_t[:, :] mask=None,
21282128
uint8_t[:, ::1] result_mask=None,
21292129
bint skipna=True,
21302130
) -> None:
@@ -2148,7 +2148,7 @@ def group_min(
21482148
cdef group_cummin_max(
21492149
numeric_t[:, ::1] out,
21502150
ndarray[numeric_t, ndim=2] values,
2151-
const uint8_t[:, ::1] mask,
2151+
const uint8_t[:, :] mask,
21522152
uint8_t[:, ::1] result_mask,
21532153
const intp_t[::1] labels,
21542154
int ngroups,
@@ -2264,7 +2264,7 @@ def group_cummin(
22642264
const intp_t[::1] labels,
22652265
int ngroups,
22662266
bint is_datetimelike,
2267-
const uint8_t[:, ::1] mask=None,
2267+
const uint8_t[:, :] mask=None,
22682268
uint8_t[:, ::1] result_mask=None,
22692269
bint skipna=True,
22702270
) -> None:
@@ -2290,7 +2290,7 @@ def group_cummax(
22902290
const intp_t[::1] labels,
22912291
int ngroups,
22922292
bint is_datetimelike,
2293-
const uint8_t[:, ::1] mask=None,
2293+
const uint8_t[:, :] mask=None,
22942294
uint8_t[:, ::1] result_mask=None,
22952295
bint skipna=True,
22962296
) -> None:

pandas/tests/groupby/test_all_methods.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,3 +84,22 @@ def test_dup_labels_output_shape(groupby_func, idx):
8484

8585
assert result.shape == (1, 2)
8686
tm.assert_index_equal(result.columns, idx)
87+
88+
89+
def test_not_c_contiguous_mask(groupby_func):
90+
# https://github.com/pandas-dev/pandas/issues/61031
91+
if groupby_func == "corrwith":
92+
# corrwith is deprecated
93+
return
94+
df = DataFrame({"a": [1, 1, 2], "b": [3, 4, 5]}, dtype="Int64")
95+
reversed = DataFrame(
96+
{"a": [2, 1, 1], "b": [5, 4, 3]}, dtype="Int64", index=[2, 1, 0]
97+
)[::-1]
98+
assert not reversed["b"].array._mask.flags["C_CONTIGUOUS"]
99+
args = get_groupby_method_args(groupby_func, df)
100+
101+
gb_reversed = reversed.groupby("a")
102+
result = getattr(gb_reversed, groupby_func)(*args)
103+
gb = df.groupby("a")
104+
expected = getattr(gb, groupby_func)(*args)
105+
tm.assert_equal(result, expected)

0 commit comments

Comments
 (0)