Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.
Prev Previous commit
Next Next commit
Merge branch 'master' into dfat
  • Loading branch information
1e-to authored Apr 22, 2020
commit 261b5b0ad7800f244d5a6e596ccd0c217159d753
113 changes: 113 additions & 0 deletions sdc/datatypes/hpat_pandas_dataframe_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1906,6 +1906,58 @@ def _df_getitem_tuple_at_impl(self, idx):
return func_text, global_vars


def df_getitem_single_label_loc_codegen(self, idx):
"""
Example of generated implementation:
def _df_getitem_single_label_loc_impl(self, idx):
idx_list = find_idx(self._dataframe._index, idx)
data_0 = _sdc_take(self._dataframe._data[0], idx_list)
res_data_0 = pandas.Series(data_0)
data_1 = _sdc_take(self._dataframe._data[1], idx_list)
res_data_1 = pandas.Series(data_1)
if len(idx_list) < 1:
raise KeyError('Index is not in the DataFrame')
new_index = _sdc_take(self._dataframe._index, idx_list)
return pandas.DataFrame({"A": res_data_0, "B": res_data_1}, index=new_index)
"""
if isinstance(self.index, types.NoneType):
fill_list = [' idx_list = numpy.array([idx])']
new_index = [' new_index = numpy.array([idx])']

else:
fill_list = [' idx_list = find_idx(self._dataframe._index, idx)']
new_index = [' new_index = _sdc_take(self._dataframe._index, idx_list)']

fill_list_text = '\n'.join(fill_list)
new_index_text = '\n'.join(new_index)
func_lines = ['def _df_getitem_single_label_loc_impl(self, idx):',
f'{fill_list_text}']
results = []
for i, c in enumerate(self.columns):
data = f'data_{i}'
index_in_list = f'index_in_list_{i}'
res_data = f'res_data_{i}'
func_lines += [f' {data} = _sdc_take(self._dataframe._data[{i}], idx_list)',
f' {res_data} = pandas.Series({data})']
results.append((c, res_data))

func_lines += [' if len(idx_list) < 1:',
" raise KeyError('Index is not in the DataFrame')"]

data = ', '.join(f'"{col}": {data}' for col, data in results)
func_lines += [f'{new_index_text}',
f' return pandas.DataFrame({{{data}}}, index=new_index)']

func_text = '\n'.join(func_lines)
global_vars = {'pandas': pandas, 'numpy': numpy,
'numba': numba,
'_sdc_take': _sdc_take,
'find_idx': find_idx,
'KeyError': KeyError}

return func_text, global_vars


def df_getitem_int_iloc_codegen(self, idx):
"""
Example of generated implementation:
Expand Down Expand Up @@ -2049,6 +2101,9 @@ def gen_df_getitem_tuple_at_impl(self, row, col):
return _reduce_impl


gen_df_getitem_loc_single_label_impl = gen_impl_generator(
df_getitem_single_label_loc_codegen, '_df_getitem_single_label_loc_impl')

gen_df_getitem_iloc_int_impl = gen_impl_generator(
df_getitem_int_iloc_codegen, '_df_getitem_int_iloc_impl')

Expand Down Expand Up @@ -2084,6 +2139,13 @@ def sdc_pandas_dataframe_accessor_getitem(self, idx):

raise TypingError('Attribute at(). The index must be a row and literal column. Given: {}'.format(idx))

if accessor == 'loc':
if isinstance(idx, (types.Integer, types.UnicodeType, types.StringLiteral)):
return gen_df_getitem_loc_single_label_impl(self.dataframe, idx)

ty_checker = TypeChecker('Attribute loc().')
ty_checker.raise_exc(idx, 'int or str', 'idx')

if accessor == 'iat':
if isinstance(idx, types.Tuple) and isinstance(idx[1], types.Literal):
col = idx[1].literal_value
Expand Down Expand Up @@ -2287,6 +2349,57 @@ def sdc_pandas_dataframe_at_impl(self):
return sdc_pandas_dataframe_at_impl


@sdc_overload_attribute(DataFrameType, 'loc')
def sdc_pandas_dataframe_loc(self):
"""
Intel Scalable Dataframe Compiler User Guide
********************************************

Pandas API: pandas.DataFrame.loc

Limitations
-----------
- Loc always returns Dataframe.
- Parameter ``idx`` is supported only to be a single value, e.g. :obj:`df.loc['A']`.

Examples
--------
.. literalinclude:: ../../../examples/dataframe/dataframe_loc.py
:language: python
:lines: 36-
:caption: Access a group of rows and columns by label(s) or a boolean array.
:name: ex_dataframe_loc

.. command-output:: python ./dataframe/dataframe_loc.py
:cwd: ../../../examples

.. seealso::
:ref:`DataFrame.at <pandas.DataFrame.at>`
Access a single value for a row/column label pair.
:ref:`DataFrame.iloc <pandas.DataFrame.iloc>`
Access group of rows and columns by integer position(s).
:ref:`DataFrame.xs <pandas.DataFrame.xs>`
Returns a cross-section (row(s) or column(s)) from the Series/DataFrame.
:ref:`Series.loc <pandas.Series.loc>`
Access group of values using labels.

Intel Scalable Dataframe Compiler Developer Guide
*************************************************
Pandas DataFrame method :meth:`pandas.DataFrame.loc` implementation.

.. only:: developer
Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_loc*
"""

ty_checker = TypeChecker('Attribute loc().')
ty_checker.check(self, DataFrameType)

def sdc_pandas_dataframe_loc_impl(self):
return sdc.datatypes.hpat_pandas_dataframe_getitem_types.dataframe_getitem_accessor_init(self, 'loc')

return sdc_pandas_dataframe_loc_impl


@sdc_overload_method(DataFrameType, 'pct_change')
def pct_change_overload(df, periods=1, fill_method='pad', limit=None, freq=None):
"""
Expand Down
34 changes: 34 additions & 0 deletions sdc/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1237,6 +1237,40 @@ def test_impl(df):
msg = 'Index is not in the Series'
self.assertIn(msg, str(raises.exception))

def test_df_loc(self):
def test_impl(df):
return df.loc[4]

sdc_func = sdc.jit(test_impl)
idx = [3, 4, 1, 4, 0]
df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
"B": [3, 4, 1, 0, 222],
"C": [3.1, 8.4, 7.1, 3.2, 1]}, index=idx)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

@unittest.skip("SDC Dataframe.loc[] always return Dataframe")
def test_df_loc_str(self):
def test_impl(df):
return df.loc['c']

sdc_func = sdc.jit(test_impl)
idx = ['a', 'b', 'c', 'с', 'e']
df = pd.DataFrame({"A": ['3.2', '4.4', '7.0', '3.3', '1.0'],
"B": ['3', '4', '1', '0', '222'],
"C": ['3.1', '8.4', '7.1', '3.2', '1']}, index=idx)
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

@unittest.skip("SDC Dataframe.loc[] always return Dataframe")
def test_df_loc_no_idx(self):
def test_impl(df):
return df.loc[2]

sdc_func = sdc.jit(test_impl)
df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
"B": [3, 4, 1, 0, 222],
"C": [3.1, 8.4, 7.1, 3.2, 1]})
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))

def test_df_head(self):
def get_func(n):
def impl(a):
Expand Down
You are viewing a condensed version of this merge commit. You can view the full changes here.