@@ -332,19 +332,13 @@ cdef float64_t calc_var(
332332 int ddof,
333333 float64_t nobs,
334334 float64_t ssqdm_x,
335- int64_t num_consecutive_same_value
336335) noexcept nogil:
337336 cdef:
338337 float64_t result
339338
340339 # Variance is unchanged if no observation is added or removed
341340 if (nobs >= minp) and (nobs > ddof):
342-
343- # pathological case & repeatedly same values case
344- if nobs == 1 or num_consecutive_same_value >= nobs:
345- result = 0
346- else :
347- result = ssqdm_x / (nobs - < float64_t> ddof)
341+ result = ssqdm_x / (nobs - < float64_t> ddof)
348342 else :
349343 result = NaN
350344
@@ -357,27 +351,19 @@ cdef void add_var(
357351 float64_t * mean_x,
358352 float64_t * ssqdm_x,
359353 float64_t * compensation,
360- int64_t * num_consecutive_same_value,
361- float64_t * prev_value,
354+ bint * numerically_unstable,
362355) noexcept nogil:
363356 """ add a value from the var calc """
364357 cdef:
365358 float64_t delta, prev_mean, y, t
359+ float64_t prev_m2 = ssqdm_x[0 ]
366360
367361 # GH#21813, if msvc 2017 bug is resolved, we should be OK with != instead of `isnan`
368362 if val != val:
369363 return
370364
371365 nobs[0 ] = nobs[0 ] + 1
372366
373- # GH#42064, record num of same values to remove floating point artifacts
374- if val == prev_value[0 ]:
375- num_consecutive_same_value[0 ] += 1
376- else :
377- # reset to 1 (include current value itself)
378- num_consecutive_same_value[0 ] = 1
379- prev_value[0 ] = val
380-
381367 # Welford's method for the online variance-calculation
382368 # using Kahan summation
383369 # https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance
@@ -392,17 +378,23 @@ cdef void add_var(
392378 mean_x[0 ] = 0
393379 ssqdm_x[0 ] = ssqdm_x[0 ] + (val - prev_mean) * (val - mean_x[0 ])
394380
381+ if prev_m2 * InvCondTol > ssqdm_x[0 ]:
382+ # possible catastrophic cancellation
383+ numerically_unstable[0 ] = True
384+
395385
396386cdef void remove_var(
397387 float64_t val,
398388 float64_t * nobs,
399389 float64_t * mean_x,
400390 float64_t * ssqdm_x,
401- float64_t * compensation
391+ float64_t * compensation,
392+ bint * numerically_unstable,
402393) noexcept nogil:
403394 """ remove a value from the var calc """
404395 cdef:
405396 float64_t delta, prev_mean, y, t
397+ float64_t prev_m2 = ssqdm_x[0 ]
406398 if val == val:
407399 nobs[0 ] = nobs[0 ] - 1
408400 if nobs[0 ]:
@@ -416,9 +408,14 @@ cdef void remove_var(
416408 delta = t
417409 mean_x[0 ] = mean_x[0 ] - delta / nobs[0 ]
418410 ssqdm_x[0 ] = ssqdm_x[0 ] - (val - prev_mean) * (val - mean_x[0 ])
411+
412+ if prev_m2 * InvCondTol > ssqdm_x[0 ]:
413+ # possible catastrophic cancellation
414+ numerically_unstable[0 ] = True
419415 else :
420416 mean_x[0 ] = 0
421417 ssqdm_x[0 ] = 0
418+ numerically_unstable[0 ] = False
422419
423420
424421def roll_var (const float64_t[:] values , ndarray[int64_t] start ,
@@ -428,11 +425,12 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
428425 """
429426 cdef:
430427 float64_t mean_x , ssqdm_x , nobs , compensation_add ,
431- float64_t compensation_remove , prev_value
432- int64_t s , e , num_consecutive_same_value
428+ float64_t compensation_remove
429+ int64_t s , e
433430 Py_ssize_t i , j , N = len (start)
434431 ndarray[float64_t] output
435432 bint is_monotonic_increasing_bounds
433+ bint requires_recompute , numerically_unstable
436434
437435 minp = max (minp, 1 )
438436 is_monotonic_increasing_bounds = is_monotonic_increasing_start_end_bounds(
@@ -449,32 +447,35 @@ def roll_var(const float64_t[:] values, ndarray[int64_t] start,
449447
450448 # Over the first window, observations can only be added
451449 # never removed
452- if i == 0 or not is_monotonic_increasing_bounds or s >= end[i - 1 ]:
453-
454- prev_value = values[s]
455- num_consecutive_same_value = 0
456-
457- mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
458- for j in range (s, e):
459- add_var(values[j], & nobs, & mean_x, & ssqdm_x, & compensation_add,
460- & num_consecutive_same_value, & prev_value)
461-
462- else :
450+ requires_recompute = (
451+ i == 0
452+ or not is_monotonic_increasing_bounds
453+ or s >= end[i - 1 ]
454+ )
463455
456+ if not requires_recompute:
464457 # After the first window, observations can both be added
465458 # and removed
466459
467460 # calculate deletes
468461 for j in range (start[i - 1 ], s):
469462 remove_var(values[j], & nobs, & mean_x, & ssqdm_x,
470- & compensation_remove)
463+ & compensation_remove, & numerically_unstable )
471464
472465 # calculate adds
473466 for j in range (end[i - 1 ], e):
474467 add_var(values[j], & nobs, & mean_x, & ssqdm_x, & compensation_add,
475- & num_consecutive_same_value, & prev_value)
468+ & numerically_unstable)
469+
470+ if requires_recompute or numerically_unstable:
471+
472+ mean_x = ssqdm_x = nobs = compensation_add = compensation_remove = 0
473+ for j in range (s, e):
474+ add_var(values[j], & nobs, & mean_x, & ssqdm_x, & compensation_add,
475+ & numerically_unstable)
476+ numerically_unstable = False
476477
477- output[i] = calc_var(minp, ddof, nobs, ssqdm_x, num_consecutive_same_value )
478+ output[i] = calc_var(minp, ddof, nobs, ssqdm_x)
478479
479480 if not is_monotonic_increasing_bounds:
480481 nobs = 0.0
0 commit comments