Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 5b14c70

Browse files
Overload df.rolling.quantile() (#481)
* Overload df.rolling.quantile() * Add perf.test for df.rolling.quantile() Co-authored-by: Alexander Kalistratov <alexander.kalistratov@intel.com>
1 parent 5bf6a6e commit 5b14c70

4 files changed

Lines changed: 167 additions & 44 deletions

File tree

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# *****************************************************************************
2+
# Copyright (c) 2020, Intel Corporation All rights reserved.
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are met:
6+
#
7+
# Redistributions of source code must retain the above copyright notice,
8+
# this list of conditions and the following disclaimer.
9+
#
10+
# Redistributions in binary form must reproduce the above copyright notice,
11+
# this list of conditions and the following disclaimer in the documentation
12+
# and/or other materials provided with the distribution.
13+
#
14+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
16+
# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
18+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
21+
# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22+
# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
23+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
24+
# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25+
# *****************************************************************************
26+
27+
import pandas as pd
28+
from numba import njit
29+
30+
31+
@njit
32+
def df_rolling_quantile():
33+
df = pd.DataFrame({'A': [4, 3, 5, 2, 6], 'B': [-4, -3, -5, -2, -6]})
34+
out_df = df.rolling(3).quantile(0.25)
35+
36+
# Expect DataFrame of
37+
# {'A': [NaN, NaN, 3.5, 2.5, 3.5], 'B': [NaN, NaN, -4.5, -4.0, -5.5]}
38+
return out_df
39+
40+
41+
print(df_rolling_quantile())

sdc/datatypes/hpat_pandas_dataframe_rolling_functions.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@
2626
import numpy
2727
import pandas
2828

29-
from numba.types import float64, Boolean, Omitted, NoneType
29+
from numba.types import (float64, Boolean, Number, Omitted,
30+
NoneType, StringLiteral, UnicodeType)
3031
from sdc.datatypes.common_functions import TypeChecker, params2list
3132
from sdc.datatypes.hpat_pandas_dataframe_rolling_types import DataFrameRollingType
3233
from sdc.hiframes.pd_dataframe_ext import get_dataframe_data
@@ -352,6 +353,23 @@ def sdc_pandas_dataframe_rolling_min(self):
352353
return gen_df_rolling_method_impl('min', self)
353354

354355

356+
@sdc_overload_method(DataFrameRollingType, 'quantile')
357+
def sdc_pandas_dataframe_rolling_quantile(self, quantile, interpolation='linear'):
358+
359+
ty_checker = TypeChecker('Method rolling.quantile().')
360+
ty_checker.check(self, DataFrameRollingType)
361+
362+
if not isinstance(quantile, Number):
363+
ty_checker.raise_exc(quantile, 'float', 'quantile')
364+
365+
str_types = (Omitted, StringLiteral, UnicodeType)
366+
if not isinstance(interpolation, str_types) and interpolation != 'linear':
367+
ty_checker.raise_exc(interpolation, 'str', 'interpolation')
368+
369+
return gen_df_rolling_method_impl('quantile', self, args=['quantile'],
370+
kws={'interpolation': '"linear"'})
371+
372+
355373
@sdc_overload_method(DataFrameRollingType, 'skew')
356374
def sdc_pandas_dataframe_rolling_skew(self):
357375

@@ -455,6 +473,25 @@ def sdc_pandas_dataframe_rolling_sum(self):
455473
'extra_params': ''
456474
})
457475

476+
sdc_pandas_dataframe_rolling_quantile.__doc__ = sdc_pandas_dataframe_rolling_docstring_tmpl.format(**{
477+
'method_name': 'quantile',
478+
'example_caption': 'Calculate the rolling quantile.',
479+
'limitations_block':
480+
"""
481+
Limitations
482+
-----------
483+
Supported ``interpolation`` only can be `'linear'`.
484+
DataFrame elements cannot be max/min float/integer. Otherwise SDC and Pandas results are different.
485+
""",
486+
'extra_params':
487+
"""
488+
quantile: :obj:`float`
489+
Quantile to compute. 0 <= quantile <= 1.
490+
interpolation: :obj:`str`
491+
This optional parameter specifies the interpolation method to use.
492+
"""
493+
})
494+
458495
sdc_pandas_dataframe_rolling_skew.__doc__ = sdc_pandas_dataframe_rolling_docstring_tmpl.format(**{
459496
'method_name': 'skew',
460497
'example_caption': 'Unbiased rolling skewness.',

sdc/tests/test_rolling.py

Lines changed: 84 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -706,6 +706,54 @@ def test_impl(obj, window, min_periods):
706706
ref_result = test_impl(obj, window, min_periods)
707707
assert_equal(jit_result, ref_result)
708708

709+
def _test_rolling_quantile(self, obj):
710+
def test_impl(obj, window, min_periods, quantile):
711+
return obj.rolling(window, min_periods).quantile(quantile)
712+
713+
hpat_func = self.jit(test_impl)
714+
assert_equal = self._get_assert_equal(obj)
715+
quantiles = [0, 0.25, 0.5, 0.75, 1]
716+
717+
for window in range(0, len(obj) + 3, 2):
718+
for min_periods, q in product(range(0, window, 2), quantiles):
719+
with self.subTest(obj=obj, window=window,
720+
min_periods=min_periods, quantiles=q):
721+
jit_result = hpat_func(obj, window, min_periods, q)
722+
ref_result = test_impl(obj, window, min_periods, q)
723+
assert_equal(jit_result, ref_result)
724+
725+
def _test_rolling_quantile_exception_unsupported_types(self, obj):
726+
def test_impl(obj, quantile, interpolation):
727+
return obj.rolling(3, 2).quantile(quantile, interpolation)
728+
729+
hpat_func = self.jit(test_impl)
730+
731+
msg_tmpl = 'Method rolling.quantile(). The object {}\n given: {}\n expected: {}'
732+
733+
with self.assertRaises(TypingError) as raises:
734+
hpat_func(obj, '0.5', 'linear')
735+
msg = msg_tmpl.format('quantile', 'unicode_type', 'float')
736+
self.assertIn(msg, str(raises.exception))
737+
738+
with self.assertRaises(TypingError) as raises:
739+
hpat_func(obj, 0.5, None)
740+
msg = msg_tmpl.format('interpolation', 'none', 'str')
741+
self.assertIn(msg, str(raises.exception))
742+
743+
def _test_rolling_quantile_exception_unsupported_values(self, obj):
744+
def test_impl(obj, quantile, interpolation):
745+
return obj.rolling(3, 2).quantile(quantile, interpolation)
746+
747+
hpat_func = self.jit(test_impl)
748+
749+
with self.assertRaises(ValueError) as raises:
750+
hpat_func(obj, 2, 'linear')
751+
self.assertIn('quantile value not in [0, 1]', str(raises.exception))
752+
753+
with self.assertRaises(ValueError) as raises:
754+
hpat_func(obj, 0.5, 'lower')
755+
self.assertIn('interpolation value not "linear"', str(raises.exception))
756+
709757
def _test_rolling_skew(self, obj):
710758
def test_impl(obj, window, min_periods):
711759
return obj.rolling(window, min_periods).skew()
@@ -929,6 +977,37 @@ def test_impl(df):
929977

930978
pd.testing.assert_frame_equal(hpat_func(df), test_impl(df))
931979

980+
@skip_sdc_jit('DataFrame.rolling.quantile() unsupported')
981+
def test_df_rolling_quantile(self):
982+
all_data = [
983+
list(range(10)), [1., -1., 0., 0.1, -0.1],
984+
[1., np.inf, np.inf, -1., 0., np.inf, np.NINF, np.NINF],
985+
[np.nan, np.inf, np.inf, np.nan, np.nan, np.nan, np.NINF, np.NZERO]
986+
]
987+
length = min(len(d) for d in all_data)
988+
data = {n: d[:length] for n, d in zip(string.ascii_uppercase, all_data)}
989+
df = pd.DataFrame(data)
990+
991+
self._test_rolling_quantile(df)
992+
993+
@skip_sdc_jit('DataFrame.rolling.quantile() unsupported exceptions')
994+
def test_df_rolling_quantile_exception_unsupported_types(self):
995+
all_data = [[1., -1., 0., 0.1, -0.1], [-1., 1., 0., -0.1, 0.1]]
996+
length = min(len(d) for d in all_data)
997+
data = {n: d[:length] for n, d in zip(string.ascii_uppercase, all_data)}
998+
df = pd.DataFrame(data)
999+
1000+
self._test_rolling_quantile_exception_unsupported_types(df)
1001+
1002+
@skip_sdc_jit('DataFrame.rolling.quantile() unsupported exceptions')
1003+
def test_df_rolling_quantile_exception_unsupported_values(self):
1004+
all_data = [[1., -1., 0., 0.1, -0.1], [-1., 1., 0., -0.1, 0.1]]
1005+
length = min(len(d) for d in all_data)
1006+
data = {n: d[:length] for n, d in zip(string.ascii_uppercase, all_data)}
1007+
df = pd.DataFrame(data)
1008+
1009+
self._test_rolling_quantile_exception_unsupported_values(df)
1010+
9321011
@skip_sdc_jit('DataFrame.rolling.skew() unsupported')
9331012
def test_df_rolling_skew(self):
9341013
all_data = test_global_input_data_float64
@@ -1152,63 +1231,25 @@ def test_series_rolling_min(self):
11521231

11531232
@skip_sdc_jit('Series.rolling.quantile() unsupported Series index')
11541233
def test_series_rolling_quantile(self):
1155-
def test_impl(series, window, min_periods, quantile):
1156-
return series.rolling(window, min_periods).quantile(quantile)
1157-
1158-
hpat_func = self.jit(test_impl)
1159-
11601234
all_data = [
11611235
list(range(10)), [1., -1., 0., 0.1, -0.1],
11621236
[1., np.inf, np.inf, -1., 0., np.inf, np.NINF, np.NINF],
11631237
[np.nan, np.inf, np.inf, np.nan, np.nan, np.nan, np.NINF, np.NZERO]
11641238
]
11651239
indices = [list(range(len(data)))[::-1] for data in all_data]
1166-
quantiles = [0, 0.25, 0.5, 0.75, 1]
11671240
for data, index in zip(all_data, indices):
11681241
series = pd.Series(data, index, name='A')
1169-
for window in range(0, len(series) + 3, 2):
1170-
for min_periods, q in product(range(0, window, 2), quantiles):
1171-
with self.subTest(series=series, window=window,
1172-
min_periods=min_periods, quantiles=q):
1173-
jit_result = hpat_func(series, window, min_periods, q)
1174-
ref_result = test_impl(series, window, min_periods, q)
1175-
pd.testing.assert_series_equal(jit_result, ref_result)
1242+
self._test_rolling_quantile(series)
11761243

11771244
@skip_sdc_jit('Series.rolling.quantile() unsupported exceptions')
11781245
def test_series_rolling_quantile_exception_unsupported_types(self):
1179-
def test_impl(quantile, interpolation):
1180-
series = pd.Series([1., -1., 0., 0.1, -0.1])
1181-
return series.rolling(3, 2).quantile(quantile, interpolation)
1182-
1183-
hpat_func = self.jit(test_impl)
1184-
1185-
msg_tmpl = 'Method rolling.quantile(). The object {}\n given: {}\n expected: {}'
1186-
1187-
with self.assertRaises(TypingError) as raises:
1188-
hpat_func('0.5', 'linear')
1189-
msg = msg_tmpl.format('quantile', 'unicode_type', 'float')
1190-
self.assertIn(msg, str(raises.exception))
1191-
1192-
with self.assertRaises(TypingError) as raises:
1193-
hpat_func(0.5, None)
1194-
msg = msg_tmpl.format('interpolation', 'none', 'str')
1195-
self.assertIn(msg, str(raises.exception))
1246+
series = pd.Series([1., -1., 0., 0.1, -0.1])
1247+
self._test_rolling_quantile_exception_unsupported_types(series)
11961248

11971249
@skip_sdc_jit('Series.rolling.quantile() unsupported exceptions')
11981250
def test_series_rolling_quantile_exception_unsupported_values(self):
1199-
def test_impl(quantile, interpolation):
1200-
series = pd.Series([1., -1., 0., 0.1, -0.1])
1201-
return series.rolling(3, 2).quantile(quantile, interpolation)
1202-
1203-
hpat_func = self.jit(test_impl)
1204-
1205-
with self.assertRaises(ValueError) as raises:
1206-
hpat_func(2, 'linear')
1207-
self.assertIn('quantile value not in [0, 1]', str(raises.exception))
1208-
1209-
with self.assertRaises(ValueError) as raises:
1210-
hpat_func(0.5, 'lower')
1211-
self.assertIn('interpolation value not "linear"', str(raises.exception))
1251+
series = pd.Series([1., -1., 0., 0.1, -0.1])
1252+
self._test_rolling_quantile_exception_unsupported_values(series)
12121253

12131254
@skip_sdc_jit('Series.rolling.skew() unsupported Series index')
12141255
def test_series_rolling_skew(self):

sdc/tests/tests_perf/test_perf_df_rolling.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ def setUpClass(cls):
9494
'mean': [2 * 10 ** 5],
9595
'median': [2 * 10 ** 5],
9696
'min': [2 * 10 ** 5],
97+
'quantile': [2 * 10 ** 5],
9798
'skew': [2 * 10 ** 5],
9899
'sum': [2 * 10 ** 5],
99100
}
@@ -196,6 +197,9 @@ def test_df_rolling_median(self):
196197
def test_df_rolling_min(self):
197198
self._test_df_rolling_method('min')
198199

200+
def test_df_rolling_quantile(self):
201+
self._test_df_rolling_method('quantile', method_params='0.25')
202+
199203
def test_df_rolling_skew(self):
200204
self._test_df_rolling_method('skew')
201205

0 commit comments

Comments
 (0)