Skip to content
Merged
Show file tree
Hide file tree
Changes from 38 commits
Commits
Show all changes
45 commits
Select commit Hold shift + click to select a range
0e66668
code impl and examples
sreeja97 Oct 18, 2025
f390c79
unit tests
sreeja97 Oct 18, 2025
bd8b131
add typehint
sreeja97 Oct 18, 2025
6548960
whats new
sreeja97 Oct 18, 2025
3d48574
mypy
sreeja97 Oct 18, 2025
54aa398
mypy
sreeja97 Oct 18, 2025
5ff65d6
doc string validation
sreeja97 Oct 18, 2025
d999923
doc
sreeja97 Oct 18, 2025
25fe854
doc
sreeja97 Oct 18, 2025
173b7fb
reverting an example that was changed accidentally
sreeja97 Oct 28, 2025
02d4bf3
review comments
sreeja97 Oct 28, 2025
3842aa5
code impl and examples
sreeja97 Oct 18, 2025
bf31b35
code impl and examples
sreeja97 Oct 18, 2025
bef8a80
add typehint
sreeja97 Oct 18, 2025
16337af
mypy
sreeja97 Oct 18, 2025
d960dd5
mypy
sreeja97 Oct 18, 2025
654dea4
keep comment
sreeja97 Oct 29, 2025
622ca7a
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 29, 2025
bb29621
Merge branch 'pandas-dev:main' into aggregate
sreeja97 Oct 29, 2025
b3dd532
code impl and examples
sreeja97 Oct 18, 2025
7ddfbad
unit tests
sreeja97 Oct 18, 2025
fc338d3
add typehint
sreeja97 Oct 18, 2025
601ffbd
whats new
sreeja97 Oct 18, 2025
57a4ee2
mypy
sreeja97 Oct 18, 2025
4ee036a
mypy
sreeja97 Oct 18, 2025
1900160
doc string validation
sreeja97 Oct 18, 2025
5e6d051
doc
sreeja97 Oct 18, 2025
aeaab18
doc
sreeja97 Oct 18, 2025
8440ac0
reverting an example that was changed accidentally
sreeja97 Oct 28, 2025
21220ff
review comments
sreeja97 Oct 28, 2025
be4e1c6
code impl and examples
sreeja97 Oct 18, 2025
c659fd6
code impl and examples
sreeja97 Oct 18, 2025
9f8a4c3
add typehint
sreeja97 Oct 18, 2025
196d1fb
mypy
sreeja97 Oct 18, 2025
9396313
mypy
sreeja97 Oct 18, 2025
65d0d96
keep comment
sreeja97 Oct 29, 2025
3826e26
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 29, 2025
93d51c1
Merge branch 'aggregate' of https://github.com/sreeja97/pandas into a…
sreeja97 Oct 29, 2025
fcbdee4
Merge branch 'main' into aggregate
sreeja97 Nov 3, 2025
7361570
review comments
sreeja97 Nov 3, 2025
9241409
fix circular import error
sreeja97 Nov 3, 2025
e4bdd40
fix failing test
sreeja97 Nov 3, 2025
99005e6
Merge branch 'main' into aggregate
sreeja97 Nov 3, 2025
8fc814c
restore comment
sreeja97 Nov 4, 2025
db9f7c9
restore comment
sreeja97 Nov 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,8 @@ All warnings for upcoming changes in pandas will have the base class :class:`pan

Other enhancements
^^^^^^^^^^^^^^^^^^
- :class:`pandas.NamedAgg` now forwards any ``*args`` and ``**kwargs``
to calls of ``aggfunc`` (:issue:`58283`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This reads to me like a bugfix (as if it already had *args and **kwargs, but just wasn't forwarding).

Suggested change
- :class:`pandas.NamedAgg` now forwards any ``*args`` and ``**kwargs``
to calls of ``aggfunc`` (:issue:`58283`)
- :class:`pandas.NamedAgg` now supports passing ``*args`` and ``**kwargs``
to calls of ``aggfunc`` (:issue:`58283`)

- :func:`pandas.merge` propagates the ``attrs`` attribute to the result if all
inputs have identical ``attrs``, as has so far already been the case for
:func:`pandas.concat`.
Expand Down
23 changes: 21 additions & 2 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -1745,7 +1745,13 @@ def reconstruct_func(
>>> reconstruct_func("min")
(False, 'min', None, None)
"""
relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
from pandas.core.groupby.generic import NamedAgg

relabeling = func is None and (
is_multi_agg_with_relabel(**kwargs)
or any(isinstance(v, NamedAgg) for v in kwargs.values())
)

columns: tuple[str, ...] | None = None
order: npt.NDArray[np.intp] | None = None

Expand All @@ -1766,9 +1772,22 @@ def reconstruct_func(
# "Callable[..., Any] | str | list[Callable[..., Any] | str] |
# MutableMapping[Hashable, Callable[..., Any] | str | list[Callable[..., Any] |
# str]] | None")
converted_kwargs = {}
for key, val in kwargs.items():
if isinstance(val, NamedAgg):
aggfunc = val.aggfunc
if getattr(val, "args", ()) or getattr(val, "kwargs", {}):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In what case does val not have an args or kwargs attribute?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I removed the redundant checks

a = getattr(val, "args", ())
kw = getattr(val, "kwargs", {})
aggfunc = lambda x, func=aggfunc, a=a, kw=kw: func(x, *a, **kw)
converted_kwargs[key] = (val.column, aggfunc)
else:
converted_kwargs[key] = val

func, columns, order = normalize_keyword_aggregation( # type: ignore[assignment]
kwargs
converted_kwargs
)

assert func is not None

return relabeling, func, columns, order
Expand Down
54 changes: 49 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@

from collections import abc
from collections.abc import Callable
import dataclasses
from functools import partial
from textwrap import dedent
from typing import (
TYPE_CHECKING,
Any,
Literal,
NamedTuple,
TypeAlias,
TypeVar,
cast,
Expand Down Expand Up @@ -113,19 +113,20 @@


@set_module("pandas")
class NamedAgg(NamedTuple):
@dataclasses.dataclass
class NamedAgg:
"""
Helper for column specific aggregation with control over output column names.

Subclass of typing.NamedTuple.

Parameters
----------
column : Hashable
Column label in the DataFrame to apply aggfunc.
aggfunc : function or str
Function to apply to the provided column. If string, the name of a built-in
pandas function.
*args, **kwargs :
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
*args, **kwargs :
*args, **kwargs : Any

Optional positional and keyword arguments passed to ``aggfunc``.

See Also
--------
Expand All @@ -137,14 +138,57 @@ class NamedAgg(NamedTuple):
>>> agg_a = pd.NamedAgg(column="a", aggfunc="min")
>>> agg_1 = pd.NamedAgg(column=1, aggfunc=lambda x: np.mean(x))
>>> df.groupby("key").agg(result_a=agg_a, result_1=agg_1)
result_a result_1
result_a result_1
key
1 -1 10.5
2 1 12.0

>>> def n_between(ser, low, high, **kwargs):
... return ser.between(low, high, **kwargs).sum()

>>> agg_between = pd.NamedAgg("a", n_between, 0, 1)
>>> df.groupby("key").agg(count_between=agg_between)
count_between
key
1 1
2 1

>>> agg_between_kw = pd.NamedAgg("a", n_between, 0, 1, inclusive="both")
>>> df.groupby("key").agg(count_between_kw=agg_between_kw)
count_between_kw
key
1 1
2 1
"""

column: Hashable
aggfunc: AggScalar
args: tuple[Any, ...] = dataclasses.field(default_factory=tuple)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think either way is fine, just wanted to mention this could be args: tuple[Any, ...] = () since tuples are immutable.

kwargs: dict[str, Any] = dataclasses.field(default_factory=dict)

def __init__(
self,
column: Hashable,
aggfunc: Callable[..., Any] | str,
*args: Any,
**kwargs: Any,
) -> None:
self.column = column
self.aggfunc = aggfunc
self.args = args
self.kwargs = kwargs

def __getitem__(self, key: int) -> Any:
"""Provide backward-compatible tuple-style access."""
if key == 0:
return self.column
elif key == 1:
return self.aggfunc
elif key == 2:
return self.args
elif key == 3:
return self.kwargs
raise IndexError("index out of range")


@set_module("pandas.api.typing")
Expand Down
51 changes: 51 additions & 0 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -866,6 +866,57 @@ def test_agg_namedtuple(self):
expected = df.groupby("A").agg(b=("B", "sum"), c=("B", "count"))
tm.assert_frame_equal(result, expected)

def n_between(self, ser, low, high, **kwargs):
return ser.between(low, high, **kwargs).sum()

def test_namedagg_args(self):
df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]})

result = df.groupby("A").agg(
count_between=pd.NamedAgg("B", self.n_between, 0, 1)
)
expected = DataFrame({"count_between": [1, 1]}, index=Index([0, 1], name="A"))
tm.assert_frame_equal(result, expected)

def test_namedagg_kwargs(self):
df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]})

result = df.groupby("A").agg(
count_between_kw=pd.NamedAgg("B", self.n_between, 0, 1, inclusive="both")
)
expected = DataFrame(
{"count_between_kw": [1, 1]}, index=Index([0, 1], name="A")
)
tm.assert_frame_equal(result, expected)

def test_namedagg_args_and_kwargs(self):
df = DataFrame({"A": [0, 0, 1, 1], "B": [-1, 0, 1, 2]})

result = df.groupby("A").agg(
count_between_mix=pd.NamedAgg(
"B", self.n_between, 0, 1, inclusive="neither"
)
)
expected = DataFrame(
{"count_between_mix": [0, 0]}, index=Index([0, 1], name="A")
)
tm.assert_frame_equal(result, expected)

def test_multiple_named_agg_with_args_and_kwargs(self):
df = DataFrame({"A": [0, 1, 2, 3], "B": [1, 2, 3, 4]})

result = df.groupby("A").agg(
n_between01=pd.NamedAgg("B", self.n_between, 0, 1),
n_between13=pd.NamedAgg("B", self.n_between, 1, 3),
n_between02=pd.NamedAgg("B", self.n_between, 0, 2),
)
expected = df.groupby("A").agg(
n_between01=("B", lambda x: x.between(0, 1).sum()),
n_between13=("B", lambda x: x.between(0, 3).sum()),
n_between02=("B", lambda x: x.between(0, 2).sum()),
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you specify expected here explicitly.

tm.assert_frame_equal(result, expected)

def test_mangled(self):
df = DataFrame({"A": [0, 1], "B": [1, 2], "C": [3, 4]})
result = df.groupby("A").agg(b=("B", lambda x: 0), c=("C", lambda x: 1))
Expand Down
Loading