2828import time
2929
3030import numba
31- import numpy as np
31+ import numpy
3232import pandas
3333
3434from sdc .tests .test_utils import test_global_input_data_float64
3535from sdc .tests .tests_perf .test_perf_base import TestBase
3636from sdc .tests .tests_perf .test_perf_utils import (calc_compilation , get_times ,
3737 perf_data_gen_fixed_len )
38- from .generator import generate_test_cases
39- from .generator import TestCase as TC
38+
39+
40+ rolling_usecase_tmpl = """
41+ def df_rolling_{method_name}_usecase(data, {extra_usecase_params}):
42+ start_time = time.time()
43+ res = data.rolling({rolling_params}).{method_name}({method_params})
44+ end_time = time.time()
45+ return end_time - start_time, res
46+ """
4047
4148
4249def get_rolling_params (window = 100 , min_periods = None ):
@@ -48,6 +55,27 @@ def get_rolling_params(window=100, min_periods=None):
4855 return ', ' .join (rolling_params )
4956
5057
58+ def gen_df_rolling_usecase (method_name , rolling_params = None ,
59+ extra_usecase_params = '' , method_params = '' ):
60+ """Generate df rolling method use case"""
61+ if not rolling_params :
62+ rolling_params = get_rolling_params ()
63+
64+ func_text = rolling_usecase_tmpl .format (** {
65+ 'method_name' : method_name ,
66+ 'extra_usecase_params' : extra_usecase_params ,
67+ 'rolling_params' : rolling_params ,
68+ 'method_params' : method_params
69+ })
70+
71+ global_vars = {'np' : numpy , 'time' : time }
72+ loc_vars = {}
73+ exec (func_text , global_vars , loc_vars )
74+ _df_rolling_usecase = loc_vars [f'df_rolling_{ method_name } _usecase' ]
75+
76+ return _df_rolling_usecase
77+
78+
5179# python -m sdc.runtests sdc.tests.tests_perf.test_perf_df_rolling.TestDFRollingMethods
5280class TestDFRollingMethods (TestBase ):
5381 # more than 19 columns raise SystemError: CPUDispatcher() returned a result with an error set
@@ -56,6 +84,19 @@ class TestDFRollingMethods(TestBase):
5684 @classmethod
5785 def setUpClass (cls ):
5886 super ().setUpClass ()
87+ cls .total_data_length = {
88+ 'apply' : [2 * 10 ** 5 ],
89+ 'corr' : [10 ** 5 ],
90+ 'count' : [8 * 10 ** 5 ],
91+ 'cov' : [10 ** 5 ],
92+ 'kurt' : [4 * 10 ** 5 ],
93+ 'max' : [2 * 10 ** 5 ],
94+ 'mean' : [2 * 10 ** 5 ],
95+ 'median' : [2 * 10 ** 5 ],
96+ 'min' : [2 * 10 ** 5 ],
97+ 'skew' : [2 * 10 ** 5 ],
98+ 'sum' : [2 * 10 ** 5 ],
99+ }
59100
60101 def _test_jitted (self , pyfunc , record , * args , ** kwargs ):
61102 # compilation time
@@ -67,36 +108,47 @@ def _test_jitted(self, pyfunc, record, *args, **kwargs):
67108 cfunc (* args , ** kwargs )
68109
69110 # execution and boxing time
70- record ['test_results' ], record ['boxing_results' ] = get_times (cfunc , * args , ** kwargs )
111+ record ['test_results' ], record ['boxing_results' ] = get_times (cfunc ,
112+ * args ,
113+ ** kwargs )
71114
72115 def _test_python (self , pyfunc , record , * args , ** kwargs ):
73116 record ['test_results' ], _ = get_times (pyfunc , * args , ** kwargs )
74117
75- def _test_case (self , pyfunc , name , total_data_length , data_num = 1 ,
76- input_data = test_global_input_data_float64 , columns_num = 10 ):
77-
118+ def _gen_df (self , data , columns_num = 10 ):
119+ """Generate DataFrame based on input data"""
120+ return pandas .DataFrame (
121+ {col : data for col in string .ascii_uppercase [:columns_num ]})
122+
123+ def _test_case (self , pyfunc , name ,
124+ input_data = test_global_input_data_float64 ,
125+ columns_num = 10 , extra_data_num = 0 ):
126+ """
127+ Test DataFrame.rolling method
128+ :param pyfunc: Python function to test which calls tested method inside
129+ :param name: name of the tested method, e.g. min
130+ :param input_data: initial data used for generating test data
131+ :param columns_num: number of columns in generated DataFrame
132+ :param extra_data_num: number of additionally generated DataFrames
133+ """
78134 if columns_num > self .max_columns_num :
79135 columns_num = self .max_columns_num
80136
81- if input_data is None :
82- input_data = test_global_input_data_float64
83-
84- test_name = 'DataFrame.rolling.{}' .format (name )
85-
86137 full_input_data_length = sum (len (i ) for i in input_data )
87- for data_length in total_data_length :
138+ for data_length in self . total_data_length [ name ] :
88139 base = {
89- 'test_name' : test_name ,
140+ 'test_name' : f'DF.rolling. { name } ' ,
90141 'data_size' : data_length ,
91142 }
92- data = perf_data_gen_fixed_len (input_data , full_input_data_length , data_length )
93- test_data = pandas .DataFrame ({col : data for col in string .ascii_uppercase [:columns_num ]})
143+ data = perf_data_gen_fixed_len (input_data , full_input_data_length ,
144+ data_length )
145+ test_data = self ._gen_df (data , columns_num = columns_num )
94146
95147 args = [test_data ]
96- for i in range (data_num - 1 ):
97- np .random .seed (i )
98- extra_data = np .random .ranf (data_length )
99- args .append (pandas . DataFrame ({ col : extra_data for col in string . ascii_uppercase [: columns_num ]} ))
148+ for i in range (extra_data_num ):
149+ numpy .random .seed (i )
150+ extra_data = numpy .random .ranf (data_length )
151+ args .append (self . _gen_df ( extra_data , columns_num = columns_num ))
100152
101153 record = base .copy ()
102154 record ['test_type' ] = 'SDC'
@@ -108,17 +160,44 @@ def _test_case(self, pyfunc, name, total_data_length, data_num=1,
108160 self ._test_python (pyfunc , record , * args )
109161 self .test_results .add (** record )
110162
163+ def _test_df_rolling_method (self , name , rolling_params = None ,
164+ extra_usecase_params = '' , method_params = '' ):
165+ usecase = gen_df_rolling_usecase (name , rolling_params = rolling_params ,
166+ extra_usecase_params = extra_usecase_params ,
167+ method_params = method_params )
168+ extra_data_num = 0
169+ if extra_usecase_params :
170+ extra_data_num += len (extra_usecase_params .split (', ' ))
171+ self ._test_case (usecase , name , extra_data_num = extra_data_num )
172+
173+ def test_df_rolling_apply_mean (self ):
174+ method_params = 'lambda x: np.nan if len(x) == 0 else x.mean()'
175+ self ._test_df_rolling_method ('apply' , method_params = method_params )
176+
177+ def test_df_rolling_corr (self ):
178+ self ._test_df_rolling_method ('corr' , extra_usecase_params = 'other' ,
179+ method_params = 'other=other' )
180+
181+ def test_df_rolling_count (self ):
182+ self ._test_df_rolling_method ('count' )
183+
184+ def test_df_rolling_kurt (self ):
185+ self ._test_df_rolling_method ('kurt' )
186+
187+ def test_df_rolling_max (self ):
188+ self ._test_df_rolling_method ('max' )
189+
190+ def test_df_rolling_mean (self ):
191+ self ._test_df_rolling_method ('mean' )
192+
193+ def test_df_rolling_median (self ):
194+ self ._test_df_rolling_method ('median' )
195+
196+ def test_df_rolling_min (self ):
197+ self ._test_df_rolling_method ('min' )
111198
112- cases = [
113- TC (name = 'apply' , params = 'lambda x: np.nan if len(x) == 0 else x.mean()' , size = [2 * 10 ** 5 ]),
114- TC (name = 'corr' , size = [10 ** 5 ], params = 'other' , data_num = 2 ),
115- TC (name = 'count' , size = [8 * 10 ** 5 ]),
116- TC (name = 'kurt' , size = [4 * 10 ** 5 ]),
117- TC (name = 'max' , size = [2 * 10 ** 5 ]),
118- TC (name = 'mean' , size = [2 * 10 ** 5 ]),
119- TC (name = 'median' , size = [2 * 10 ** 5 ]),
120- TC (name = 'min' , size = [2 * 10 ** 5 ]),
121- TC (name = 'skew' , size = [2 * 10 ** 5 ])
122- ]
199+ def test_df_rolling_skew (self ):
200+ self ._test_df_rolling_method ('skew' )
123201
124- generate_test_cases (cases , TestDFRollingMethods , 'df' , 'rolling({})' .format (get_rolling_params ()))
202+ def test_df_rolling_sum (self ):
203+ self ._test_df_rolling_method ('sum' )
0 commit comments