|
35 | 35 | import numpy |
36 | 36 | import sdc |
37 | 37 |
|
| 38 | + |
38 | 39 | from numba import types |
39 | 40 | from numba.special import literally |
40 | 41 | from sdc.hiframes.pd_dataframe_ext import DataFrameType |
41 | 42 | from sdc.hiframes.pd_series_type import SeriesType |
42 | 43 | from sdc.utilities.sdc_typing_utils import (TypeChecker, check_index_is_numeric, |
43 | 44 | check_types_comparable, |
44 | | - gen_df_impl_generator) |
| 45 | + gen_df_impl_generator, find_common_dtype_from_numpy_dtypes) |
45 | 46 | from sdc.str_arr_ext import StringArrayType |
46 | 47 |
|
47 | 48 | from sdc.hiframes.pd_dataframe_type import DataFrameType |
@@ -105,6 +106,132 @@ def hpat_pandas_df_index_impl(df): |
105 | 106 | return hpat_pandas_df_index_impl |
106 | 107 |
|
107 | 108 |
|
| 109 | +def sdc_pandas_dataframe_values_codegen(df, numba_common_dtype): |
| 110 | + """ |
| 111 | + Input: |
| 112 | + column_len = 3 |
| 113 | + numba_common_dtype = float64 |
| 114 | +
|
| 115 | + Func generated: |
| 116 | + def sdc_pandas_dataframe_values_impl(df): |
| 117 | + row_len = len(get_dataframe_data(df, 0)) |
| 118 | + df_col_A = get_dataframe_data(df, 0) |
| 119 | + df_col_B = get_dataframe_data(df, 1) |
| 120 | + df_col_C = get_dataframe_data(df, 2) |
| 121 | + df_values = numpy.empty(row_len*3, numpy.dtype("float64")) |
| 122 | + for i in range(row_len): |
| 123 | + df_values[i * 3 + 0] = df_col_A[i] |
| 124 | + df_values[i * 3 + 1] = df_col_B[i] |
| 125 | + df_values[i * 3 + 2] = df_col_C[i] |
| 126 | + return df_values.reshape(row_len, 3) |
| 127 | +
|
| 128 | + """ |
| 129 | + |
| 130 | + indent = 4 * ' ' |
| 131 | + func_args = ['df'] |
| 132 | + |
| 133 | + func_definition = [f'def sdc_pandas_dataframe_values_impl({", ".join(func_args)}):'] |
| 134 | + func_text = [] |
| 135 | + column_list = [] |
| 136 | + column_len = len(df.columns) |
| 137 | + func_text.append(f'row_len = len(get_dataframe_data(df, 0))') |
| 138 | + |
| 139 | + for index, column_name in enumerate(df.columns): |
| 140 | + func_text.append(f'df_col_{column_name} = get_dataframe_data(df, {index})') |
| 141 | + column_list.append(f'df_col_{column_name}') |
| 142 | + |
| 143 | + func_text.append(f'df_values = numpy.empty(row_len*{column_len}, numpy.dtype("{numba_common_dtype}"))') |
| 144 | + func_text.append('for i in range(row_len):') |
| 145 | + for j in range(column_len): |
| 146 | + func_text.append(indent + f'df_values[i * {column_len} + {j}] = {column_list[j]}[i]') |
| 147 | + |
| 148 | + func_text.append(f"return df_values.reshape(row_len, {column_len})\n") |
| 149 | + func_definition.extend([indent + func_line for func_line in func_text]) |
| 150 | + func_def = '\n'.join(func_definition) |
| 151 | + |
| 152 | + global_vars = {'pandas': pandas, 'numpy': numpy, |
| 153 | + 'get_dataframe_data': sdc.hiframes.pd_dataframe_ext.get_dataframe_data} |
| 154 | + |
| 155 | + return func_def, global_vars |
| 156 | + |
| 157 | + |
| 158 | +@sdc_overload_attribute(DataFrameType, 'values') |
| 159 | +def hpat_pandas_dataframe_values(df): |
| 160 | + """ |
| 161 | + Intel Scalable Dataframe Compiler User Guide |
| 162 | + ******************************************** |
| 163 | + Pandas API: pandas.DataFrame.values |
| 164 | +
|
| 165 | + Limitations |
| 166 | + ----------- |
| 167 | + Only numeric values supported as an output |
| 168 | +
|
| 169 | + Examples |
| 170 | + -------- |
| 171 | + .. literalinclude:: ../../../examples/dataframe/dataframe_values.py |
| 172 | + :language: python |
| 173 | + :lines: 27- |
| 174 | + :caption: The values data of the DataFrame. |
| 175 | + :name: ex_dataframe_values |
| 176 | +
|
| 177 | + .. command-output:: python ./dataframe/dataframe_values.py |
| 178 | + :cwd: ../../../examples |
| 179 | +
|
| 180 | + .. seealso:: |
| 181 | +
|
| 182 | + :ref:`DataFrame.to_numpy <pandas.DataFrame.to_numpy>` |
| 183 | + Recommended alternative to this method. |
| 184 | + :ref:`DataFrame.index <pandas.DataFrame.index>` |
| 185 | + Retrieve the index labels. |
| 186 | + :ref:`DataFrame.columns <pandas.DataFrame.columns>` |
| 187 | + Retrieving the column names. |
| 188 | +
|
| 189 | + .. note:: |
| 190 | +
|
| 191 | + The dtype will be a lower-common-denominator dtype (implicit upcasting); |
| 192 | + that is to say if the dtypes (even of numeric types) are mixed, the one that accommodates all will be chosen. |
| 193 | + Use this with care if you are not dealing with the blocks. |
| 194 | + e.g. If the dtypes are float16 and float32, dtype will be upcast to float32. If dtypes are int32 and uint8, |
| 195 | + dtype will be upcast to int32. By numpy.find_common_type() convention, |
| 196 | + mixing int64 and uint64 will result in a float64 dtype. |
| 197 | +
|
| 198 | + Intel Scalable Dataframe Compiler Developer Guide |
| 199 | + ************************************************* |
| 200 | + Pandas DataFrame attribute :attr:`pandas.DataFrame.values` implementation. |
| 201 | + .. only:: developer |
| 202 | + Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_values* |
| 203 | + Parameters |
| 204 | + ----------- |
| 205 | + df: :obj:`pandas.DataFrame` |
| 206 | + input arg |
| 207 | + Returns |
| 208 | + ------- |
| 209 | + :obj: `numpy.ndarray` |
| 210 | + return a Numpy representation of the DataFrame |
| 211 | + """ |
| 212 | + |
| 213 | + func_name = 'Attribute values.' |
| 214 | + ty_checker = TypeChecker(func_name) |
| 215 | + ty_checker.check(df, DataFrameType) |
| 216 | + |
| 217 | + # TODO: Handle StringArrayType |
| 218 | + for i, column in enumerate(df.data): |
| 219 | + if isinstance(column, StringArrayType): |
| 220 | + ty_checker.raise_exc(column, 'Numeric type', f'df.data["{df.columns[i]}"]') |
| 221 | + |
| 222 | + numba_common_dtype = find_common_dtype_from_numpy_dtypes([column.dtype for column in df.data], []) |
| 223 | + |
| 224 | + def hpat_pandas_df_values_impl(df, numba_common_dtype): |
| 225 | + loc_vars = {} |
| 226 | + func_def, global_vars = sdc_pandas_dataframe_values_codegen(df, numba_common_dtype) |
| 227 | + |
| 228 | + exec(func_def, global_vars, loc_vars) |
| 229 | + _values_impl = loc_vars['sdc_pandas_dataframe_values_impl'] |
| 230 | + return _values_impl |
| 231 | + |
| 232 | + return hpat_pandas_df_values_impl(df, numba_common_dtype) |
| 233 | + |
| 234 | + |
108 | 235 | def sdc_pandas_dataframe_append_codegen(df, other, _func_name, args): |
109 | 236 | """ |
110 | 237 | Input: |
|
0 commit comments