Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 20 additions & 10 deletions pandas/_libs/window/aggregations.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -494,12 +494,16 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
int64_t num_consecutive_same_value
) noexcept nogil:
cdef:
float64_t result, dnobs
float64_t result, dnobs, m2_cutoff
float64_t moments_ratio, correction

if nobs >= minp:
dnobs = <float64_t>nobs

# Relative cutoff as introduced in #62405
# See the comment in nanops.nankurt for further explanation
m2_cutoff = ((EpsF64 * mean) ** 2) * dnobs

if nobs < 3:
result = NaN
# GH 42064 46431
Expand All @@ -512,10 +516,11 @@ cdef float64_t calc_skew(int64_t minp, int64_t nobs,
#
# in core/nanops.py nanskew/nankurt call the function
# _zero_out_fperr(m2) to fix floating error.
# if the variance is less than 1e-14, it could be
# treat as zero, here we follow the original
# skew/kurt behaviour to check m2 <= n * 1e-14
elif m2 <= dnobs * 1e-14:
# if the variance is less than a relative cutoff value
# it could be treated as zero, here we follow the original
# skew/kurt behaviour to check
# m2 <= ((float64_machine_eps * mean) ** 2) * observations
elif m2 <= m2_cutoff:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With the new implementation for skewness, it shouldn't have much of a problem with numerical instability against degenerate distributions, which is the main reason for this branch. Can you check if it's possible to remove this branch or to just compare against 0?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Indeed works without this, so removed the branch.

result = NaN
else:
moments_ratio = m3 / (m2 * sqrt(m2))
Expand Down Expand Up @@ -688,7 +693,7 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
int64_t num_consecutive_same_value,
) noexcept nogil:
cdef:
float64_t result, dnobs
float64_t result, dnobs, variance_cutoff
float64_t A, B, C, D, R, K

if nobs >= minp:
Expand All @@ -708,16 +713,21 @@ cdef float64_t calc_kurt(int64_t minp, int64_t nobs,
R = R * A
D = xxxx / dnobs - R - 6 * B * A * A - 4 * C * A

# Relative cutoff as introduced in #62405
# See the comment in nanops.nankurt for further explanation
variance_cutoff = ((EpsF64 * B) ** 2) * dnobs

# #18044: with uniform distribution, floating issue will
# cause B != 0. and cause the result is a very
# large number.
#
# in core/nanops.py nanskew/nankurt call the function
# _zero_out_fperr(m2) to fix floating error.
# if the variance is less than 1e-14, it could be
# treat as zero, here we follow the original
# skew/kurt behaviour to check B <= 1e-14
if B <= 1e-14:
# if the variance is less than a relative cutoff value
# it could be treated as zero, here we follow the original
# skew/kurt behaviour to check
# m2 <= ((float64_machine_eps * mean) ** 2) * observations
if B <= variance_cutoff:
result = NaN
else:
K = (dnobs * dnobs - 1.) * D / (B * B) - 3 * ((dnobs - 1.) ** 2)
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/window/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,16 @@ def series():
return series


@pytest.fixture
def low_variance_series():
"""Make a mocked low variance series as a fixture"""
arr = np.random.default_rng(505).normal(loc=0e0, scale=1e-8, size=100)
locs = np.arange(20, 40)
arr[locs] = np.nan
series = Series(arr, index=bdate_range(datetime(2009, 1, 1), periods=100))
return series


@pytest.fixture
def frame():
"""Make mocked frame as fixture."""
Expand Down
10 changes: 10 additions & 0 deletions pandas/tests/window/test_rolling_skew_kurt.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ def test_series(series, sp_func, roll_func):
tm.assert_almost_equal(result.iloc[-1], compare_func(series[-50:]))


@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_low_variance_series(low_variance_series, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")

compare_func = partial(getattr(sp_stats, sp_func), bias=False)
result = getattr(low_variance_series.rolling(50), roll_func)()
assert isinstance(result, Series)
tm.assert_almost_equal(result.iloc[-1], compare_func(low_variance_series[-50:]))


@pytest.mark.parametrize("sp_func, roll_func", [["kurtosis", "kurt"], ["skew", "skew"]])
def test_frame(raw, frame, sp_func, roll_func):
sp_stats = pytest.importorskip("scipy.stats")
Expand Down
Loading