@@ -113,56 +113,53 @@ def hpat_pandas_df_index_impl(df):
113113 return hpat_pandas_df_index_impl
114114
115115
116- def sdc_pandas_dataframe_values_codegen (df , numba_common_dtype ):
116+ def sdc_pandas_dataframe_values_codegen (self , numba_common_dtype ):
117117 """
118- Input:
119- column_len = 3
120- numba_common_dtype = float64
121-
122- Func generated:
123- def sdc_pandas_dataframe_values_impl(df):
124- row_len = len(df._data[0])
125- df_col_A = df._data[0]
126- df_col_B = df._data[1]
127- df_col_C = df._data[2]
128- df_values = numpy.empty(row_len*3, numpy.dtype("float64"))
129- for i in range(row_len):
130- df_values[i * 3 + 0] = df_col_A[i]
131- df_values[i * 3 + 1] = df_col_B[i]
132- df_values[i * 3 + 2] = df_col_C[i]
133- return df_values.reshape(row_len, 3)
134-
135- """
136-
137- indent = 4 * ' '
138- func_args = ['df' ]
139-
140- func_definition = [f'def sdc_pandas_dataframe_values_impl({ ", " .join (func_args )} ):' ]
141- func_text = []
142- column_list = []
143- column_len = len (df .columns )
144- func_text .append (f'row_len = len(df._data[0])' )
145-
146- for index , column_name in enumerate (df .columns ):
147- func_text .append (f'df_col_{ index } = df._data[{ index } ]' )
148- column_list .append (f'df_col_{ index } ' )
149-
150- func_text .append (f'df_values = numpy.empty(row_len*{ column_len } , numpy.dtype("{ numba_common_dtype } "))' )
151- func_text .append ('for i in range(row_len):' )
152- for j in range (column_len ):
153- func_text .append (indent + f'df_values[i * { column_len } + { j } ] = { column_list [j ]} [i]' )
154-
155- func_text .append (f"return df_values.reshape(row_len, { column_len } )\n " )
156- func_definition .extend ([indent + func_line for func_line in func_text ])
157- func_def = '\n ' .join (func_definition )
118+ Example of generated implementation:
119+ def sdc_pandas_dataframe_values_impl(self):
120+ length = len(self._data[0][0])
121+ col_data_0 = self._data[0][0]
122+ col_data_1 = self._data[1][0]
123+ col_data_2 = self._data[0][1]
124+ values = numpy.empty(length*3, numpy.dtype("float64"))
125+ for i in range(length):
126+ values[i*3+0] = col_data_0[i]
127+ values[i*3+1] = col_data_1[i]
128+ values[i*3+2] = col_data_2[i]
129+ return values.reshape(length, 3)
130+ """
131+ columns_data = []
132+ columns_num = len (self .columns )
133+ func_lines = [
134+ f'def sdc_pandas_dataframe_values_impl(self):' ,
135+ f' length = { df_length_expr (self )} ' ,
136+ ]
137+ for i , col in enumerate (self .columns ):
138+ col_loc = self .column_loc [col ]
139+ type_id , col_id = col_loc .type_id , col_loc .col_id
140+ func_lines += [
141+ f' col_data_{ i } = self._data[{ type_id } ][{ col_id } ]' ,
142+ ]
143+ columns_data .append (f'col_data_{ i } ' )
158144
145+ func_lines += [
146+ f' values = numpy.empty(length*{ columns_num } , numpy.dtype("{ numba_common_dtype } "))' ,
147+ f' for i in range(length):' ,
148+ ]
149+ func_lines += ['\n ' .join ([
150+ f' values[i*{ columns_num } +{ j } ] = { columns_data [j ]} [i]' ,
151+ ]) for j in range (columns_num )]
152+ func_lines += [
153+ f' return values.reshape(length, { columns_num } )\n '
154+ ]
155+ func_text = '\n ' .join (func_lines )
159156 global_vars = {'pandas' : pandas , 'numpy' : numpy }
160157
161- return func_def , global_vars
158+ return func_text , global_vars
162159
163160
164161@sdc_overload_attribute (DataFrameType , 'values' )
165- def hpat_pandas_dataframe_values (df ):
162+ def hpat_pandas_dataframe_values (self ):
166163 """
167164 Intel Scalable Dataframe Compiler User Guide
168165 ********************************************
@@ -208,24 +205,24 @@ def hpat_pandas_dataframe_values(df):
208205
209206 func_name = 'Attribute values.'
210207 ty_checker = TypeChecker (func_name )
211- ty_checker .check (df , DataFrameType )
208+ ty_checker .check (self , DataFrameType )
212209
213210 # TODO: Handle StringArrayType
214- for i , column in enumerate (df .data ):
211+ for i , column in enumerate (self .data ):
215212 if isinstance (column , StringArrayType ):
216- ty_checker .raise_exc (column , 'Numeric type' , f'df.data["{ df .columns [i ]} "]' )
213+ ty_checker .raise_exc (column , 'Numeric type' , f'df.data["{ self .columns [i ]} "]' )
217214
218- numba_common_dtype = find_common_dtype_from_numpy_dtypes ([column .dtype for column in df .data ], [])
215+ numba_common_dtype = find_common_dtype_from_numpy_dtypes ([column .dtype for column in self .data ], [])
219216
220- def hpat_pandas_df_values_impl (df , numba_common_dtype ):
217+ def hpat_pandas_df_values_impl (self , numba_common_dtype ):
221218 loc_vars = {}
222- func_def , global_vars = sdc_pandas_dataframe_values_codegen (df , numba_common_dtype )
219+ func_text , global_vars = sdc_pandas_dataframe_values_codegen (self , numba_common_dtype )
223220
224- exec (func_def , global_vars , loc_vars )
221+ exec (func_text , global_vars , loc_vars )
225222 _values_impl = loc_vars ['sdc_pandas_dataframe_values_impl' ]
226223 return _values_impl
227224
228- return hpat_pandas_df_values_impl (df , numba_common_dtype )
225+ return hpat_pandas_df_values_impl (self , numba_common_dtype )
229226
230227
231228def sdc_pandas_dataframe_append_codegen (df , other , _func_name , ignore_index_value , indexes_comparable , args ):
0 commit comments