-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
ENH: pd.NamedAgg forwards *args and **kwargs to aggfunc #62729
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 38 commits
0e66668
f390c79
bd8b131
6548960
3d48574
54aa398
5ff65d6
d999923
25fe854
173b7fb
02d4bf3
3842aa5
bf31b35
bef8a80
16337af
d960dd5
654dea4
622ca7a
bb29621
b3dd532
7ddfbad
fc338d3
601ffbd
57a4ee2
4ee036a
1900160
5e6d051
aeaab18
8440ac0
21220ff
be4e1c6
c659fd6
9f8a4c3
196d1fb
9396313
65d0d96
3826e26
93d51c1
fcbdee4
7361570
9241409
e4bdd40
99005e6
8fc814c
db9f7c9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1745,7 +1745,13 @@ def reconstruct_func( | |
| >>> reconstruct_func("min") | ||
| (False, 'min', None, None) | ||
| """ | ||
| relabeling = func is None and is_multi_agg_with_relabel(**kwargs) | ||
| from pandas.core.groupby.generic import NamedAgg | ||
rhshadrach marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| relabeling = func is None and ( | ||
| is_multi_agg_with_relabel(**kwargs) | ||
| or any(isinstance(v, NamedAgg) for v in kwargs.values()) | ||
| ) | ||
|
|
||
| columns: tuple[str, ...] | None = None | ||
| order: npt.NDArray[np.intp] | None = None | ||
|
|
||
|
|
@@ -1766,9 +1772,22 @@ def reconstruct_func( | |
| # "Callable[..., Any] | str | list[Callable[..., Any] | str] | | ||
| # MutableMapping[Hashable, Callable[..., Any] | str | list[Callable[..., Any] | | ||
| # str]] | None") | ||
| converted_kwargs = {} | ||
| for key, val in kwargs.items(): | ||
| if isinstance(val, NamedAgg): | ||
| aggfunc = val.aggfunc | ||
| if getattr(val, "args", ()) or getattr(val, "kwargs", {}): | ||
|
||
| a = getattr(val, "args", ()) | ||
| kw = getattr(val, "kwargs", {}) | ||
| aggfunc = lambda x, func=aggfunc, a=a, kw=kw: func(x, *a, **kw) | ||
| converted_kwargs[key] = (val.column, aggfunc) | ||
| else: | ||
| converted_kwargs[key] = val | ||
|
|
||
| func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment] | ||
| kwargs | ||
| converted_kwargs | ||
| ) | ||
|
|
||
| assert func is not None | ||
|
|
||
| return relabeling, func, columns, order | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -10,13 +10,13 @@ | |||||
|
|
||||||
| from collections import abc | ||||||
| from collections.abc import Callable | ||||||
| import dataclasses | ||||||
| from functools import partial | ||||||
| from textwrap import dedent | ||||||
| from typing import ( | ||||||
| TYPE_CHECKING, | ||||||
| Any, | ||||||
| Literal, | ||||||
| NamedTuple, | ||||||
| TypeAlias, | ||||||
| TypeVar, | ||||||
| cast, | ||||||
|
|
@@ -113,19 +113,20 @@ | |||||
|
|
||||||
|
|
||||||
| @set_module("pandas") | ||||||
| class NamedAgg(NamedTuple): | ||||||
rhshadrach marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| @dataclasses.dataclass | ||||||
| class NamedAgg: | ||||||
| """ | ||||||
| Helper for column specific aggregation with control over output column names. | ||||||
|
|
||||||
| Subclass of typing.NamedTuple. | ||||||
|
|
||||||
| Parameters | ||||||
| ---------- | ||||||
| column : Hashable | ||||||
| Column label in the DataFrame to apply aggfunc. | ||||||
| aggfunc : function or str | ||||||
| Function to apply to the provided column. If string, the name of a built-in | ||||||
| pandas function. | ||||||
| *args, **kwargs : | ||||||
|
||||||
| *args, **kwargs : | |
| *args, **kwargs : Any |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think either way is fine, just wanted to mention this could be args: tuple[Any, ...] = () since tuples are immutable.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -866,6 +866,57 @@ def test_agg_namedtuple(self): | |
| expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count")) | ||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| def n_between(self, ser, low, high, **kwargs): | ||
| return ser.between(low, high, **kwargs).sum() | ||
|
|
||
| def test_namedagg_args(self): | ||
| df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) | ||
rhshadrach marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| result = df.groupby("A").agg( | ||
| count_between=pd.NamedAgg("B", self.n_between, 0, 1) | ||
| ) | ||
| expected = DataFrame({"count_between": [1, 1]}, index=Index([0, 1], name="A")) | ||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| def test_namedagg_kwargs(self): | ||
| df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) | ||
|
|
||
| result = df.groupby("A").agg( | ||
| count_between_kw=pd.NamedAgg("B", self.n_between, 0, 1, inclusive="both") | ||
| ) | ||
| expected = DataFrame( | ||
| {"count_between_kw": [1, 1]}, index=Index([0, 1], name="A") | ||
| ) | ||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| def test_namedagg_args_and_kwargs(self): | ||
| df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]}) | ||
|
|
||
| result = df.groupby("A").agg( | ||
| count_between_mix=pd.NamedAgg( | ||
| "B", self.n_between, 0, 1, inclusive="neither" | ||
| ) | ||
| ) | ||
| expected = DataFrame( | ||
| {"count_between_mix": [0, 0]}, index=Index([0, 1], name="A") | ||
| ) | ||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| def test_multiple_named_agg_with_args_and_kwargs(self): | ||
| df = DataFrame({"A": [0, 1, 2, 3], "B": [1, 2, 3, 4]}) | ||
|
|
||
| result = df.groupby("A").agg( | ||
| n_between01=pd.NamedAgg("B", self.n_between, 0, 1), | ||
| n_between13=pd.NamedAgg("B", self.n_between, 1, 3), | ||
| n_between02=pd.NamedAgg("B", self.n_between, 0, 2), | ||
| ) | ||
| expected = df.groupby("A").agg( | ||
| n_between01=("B", lambda x: x.between(0, 1).sum()), | ||
| n_between13=("B", lambda x: x.between(0, 3).sum()), | ||
| n_between02=("B", lambda x: x.between(0, 2).sum()), | ||
| ) | ||
|
||
| tm.assert_frame_equal(result, expected) | ||
|
|
||
| def test_mangled(self): | ||
| df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]}) | ||
| result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1)) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This reads to me like a bugfix (as if it already had
*argsand**kwargs, but just wasn't forwarding).