Df.at impl (#738)

IntelPython · akharche · May 15, 2020 · Apr 23, 2020 · Apr 23, 2020 · Apr 27, 2020
commit b74104022cf57fa592148bd90e259d74fff41851
diff --git a/examples/dataframe/dataframe_at.py b/examples/dataframe/dataframe_at.py
@@ -0,0 +1,39 @@
+# *****************************************************************************
+# Copyright (c) 2020, Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#     Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#     Redistributions in binary form must reproduce the above copyright notice,
+#     this list of conditions and the following disclaimer in the documentation
+#     and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+import pandas as pd
+from numba import njit
+
+
+@njit
+def dataframe_at():
+    df = pd.DataFrame({'A': [1.0, 2.0, 3.0, 1.0], 'B': [4, 5, 6, 7], 'C': ['a', 'b', 'c', 'd']})
+
+    return df.at[1, 'C']  # ['b']
+
+
+print(dataframe_at())
diff --git a/sdc/datatypes/hpat_pandas_dataframe_functions.py b/sdc/datatypes/hpat_pandas_dataframe_functions.py
@@ -37,7 +37,7 @@
 
 from pandas.core.indexing import IndexingError
 
-from numba import types
+from numba import types, prange
 from numba.special import literally
 from numba.typed import List, Dict
 from numba.errors import TypingError
@@ -1877,6 +1877,35 @@ def _df_getitem_unicode_idx_impl(self, idx):
     ty_checker.raise_exc(idx, expected_types, 'idx')
 
 
+def df_getitem_tuple_at_codegen(self, row, col):
+    """
+    Example of generated implementation:
+        def _df_getitem_tuple_at_impl(self, idx):
+            row, _ = idx
+            data = self._dataframe._data[1]
+            res_data = pandas.Series(data, index=self._dataframe.index)
+            return res_data.at[row]
+    """
+    func_lines = ['def _df_getitem_tuple_at_impl(self, idx):',
+                  '  row, _ = idx']
+    check = False
+    for i in range(len(self.columns)):
+        if self.columns[i] == col:
+            check = True
+            func_lines += [
+                f'  data = self._dataframe._data[{i}]',
+                f'  res_data = pandas.Series(data, index=self._dataframe.index)',
+                '  return res_data.at[row]',
+            ]
+    if check == False:  # noqa
+        raise KeyError('Column is not in the DataFrame')
+
+    func_text = '\n'.join(func_lines)
+    global_vars = {'pandas': pandas}
+
+    return func_text, global_vars
+
+
 def df_getitem_single_label_loc_codegen(self, idx):
     """
     Example of generated implementation:
@@ -2063,6 +2092,15 @@ def _df_getitem_list_bool_iloc_impl(self, idx):
     return func_text, global_vars
 
 
+def gen_df_getitem_tuple_at_impl(self, row, col):
+    func_text, global_vars = df_getitem_tuple_at_codegen(self, row, col)
+    loc_vars = {}
+    exec(func_text, global_vars, loc_vars)
+    _reduce_impl = loc_vars['_df_getitem_tuple_at_impl']
+
+    return _reduce_impl
+
+
 gen_df_getitem_loc_single_label_impl = gen_impl_generator(
     df_getitem_single_label_loc_codegen, '_df_getitem_single_label_loc_impl')
 
@@ -2086,6 +2124,21 @@ def sdc_pandas_dataframe_accessor_getitem(self, idx):
 
     accessor = self.accessor.literal_value
 
+    if accessor == 'at':
+        num_idx = isinstance(idx[0], types.Number) and isinstance(self.dataframe.index, (types.Array, types.NoneType))
+        str_idx = (isinstance(idx[0], (types.UnicodeType, types.StringLiteral))
+                   and isinstance(self.dataframe.index, StringArrayType))
+        if isinstance(idx, types.Tuple) and isinstance(idx[1], types.StringLiteral):
+            if num_idx or str_idx:
+                row = idx[0]
+                col = idx[1].literal_value
+                return gen_df_getitem_tuple_at_impl(self.dataframe, row, col)
+
+            raise TypingError('Attribute at(). The row parameter type ({}) is different from the index type\
+                              ({})'.format(type(idx[0]), type(self.dataframe.index)))
+
+        raise TypingError('Attribute at(). The index must be a row and literal column. Given: {}'.format(idx))
+
     if accessor == 'loc':
         if isinstance(idx, (types.Integer, types.UnicodeType, types.StringLiteral)):
             return gen_df_getitem_loc_single_label_impl(self.dataframe, idx)
@@ -2244,6 +2297,58 @@ def sdc_pandas_dataframe_iat_impl(self):
     return sdc_pandas_dataframe_iat_impl
 
 
+@sdc_overload_attribute(DataFrameType, 'at')
+def sdc_pandas_dataframe_at(self):
+    """
+    Intel Scalable Dataframe Compiler User Guide
+    ********************************************
+
+    Limitations
+    -----------
+    - ``Dataframe.at`` always returns ``array``.
+    - Parameter ``column`` in ``idx`` must be a literal value.
+
+    Pandas API: pandas.DataFrame.at
+
+    Examples
+    --------
+    .. literalinclude:: ../../../examples/dataframe/dataframe_at.py
+       :language: python
+       :lines: 28-
+       :caption: Access a single value for a row/column label pair.
+       :name: ex_dataframe_at
+
+    .. command-output:: python ./dataframe/dataframe_at.py
+       :cwd: ../../../examples
+
+    .. seealso::
+
+        :ref:`DataFrame.iat <pandas.DataFrame.iat>`
+            Access a single value for a row/column pair by integer position.
+
+        :ref:`DataFrame.loc <pandas.DataFrame.loc>`
+            Access a group of rows and columns by label(s).
+
+        :ref:`Series.at <pandas.Series.at>`
+            Access a single value using a label.
+
+    Intel Scalable Dataframe Compiler Developer Guide
+    *************************************************
+    Pandas DataFrame method :meth:`pandas.DataFrame.at` implementation.
+
+    .. only:: developer
+        Test: python -m sdc.runtests -k sdc.tests.test_dataframe.TestDataFrame.test_df_at*
+    """
+
+    ty_checker = TypeChecker('Attribute at().')
+    ty_checker.check(self, DataFrameType)
+
+    def sdc_pandas_dataframe_at_impl(self):
+        return dataframe_getitem_accessor_init(self, 'at')
+
+    return sdc_pandas_dataframe_at_impl
+
+
 @sdc_overload_attribute(DataFrameType, 'loc')
 def sdc_pandas_dataframe_loc(self):
     """

diff --git a/sdc/datatypes/hpat_pandas_series_functions.py b/sdc/datatypes/hpat_pandas_series_functions.py
@@ -245,9 +245,14 @@ def hpat_pandas_series_loc_impl(self, idx):
         if isinstance(idx, (int, types.Integer, types.UnicodeType, types.StringLiteral)):
             def hpat_pandas_series_at_impl(self, idx):
                 index = self._series.index
+                check = False
                 mask = numpy.empty(len(self._series._data), numpy.bool_)
                 for i in numba.prange(len(index)):
                     mask[i] = index[i] == idx
+                    if mask[i] == True:  # noqa
+                        check = True
+                if check != True:  # noqa
+                    raise ValueError("Index is not in the Series")
                 return self._series._data[mask]
 
             return hpat_pandas_series_at_impl

diff --git a/sdc/tests/test_dataframe.py b/sdc/tests/test_dataframe.py
@@ -1197,6 +1197,46 @@ def test_impl(df):
         msg = 'Index is out of bounds for axis'
         self.assertIn(msg, str(raises.exception))
 
+    def test_df_at(self):
+        def test_impl(df, n):
+            return df.at[n, 'C']
+
+        sdc_func = sdc.jit(test_impl)
+        idx = [3, 0, 1, 2, 0]
+        n_cases = [0, 2]
+        df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
+                           "B": [3, 4, 1, 0, 222],
+                           "C": ['a', 'dd', 'c', '12', 'ddf']}, index=idx)
+        for n in n_cases:
+            np.testing.assert_array_equal(sdc_func(df, n), test_impl(df, n))
+
+    def test_df_at_type(self):
+        def test_impl(df, n, k):
+            return df.at[n, "B"]
+
+        sdc_func = sdc.jit(test_impl)
+        idx = ['3', '4', '1', '2', '0']
+        n_cases = ['2', '3']
+        df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
+                           "B": [3, 4, 1, 0, 222],
+                           "C": ['a', 'dd', 'c', '12', 'ddf']}, index=idx)
+        for n in n_cases:
+            self.assertEqual(sdc_func(df, n, "B"), test_impl(df, n, "B"))
+
+    def test_df_at_value_error(self):
+        def test_impl(df):
+            return df.at[5, 'C']
+        sdc_func = sdc.jit(test_impl)
+        idx = [3, 4, 1, 2, 0]
+        df = pd.DataFrame({"A": [3.2, 4.4, 7.0, 3.3, 1.0],
+                           "B": [3, 4, 1, 0, 222],
+                           "C": [3, 4, 2, 6, 1]}, index=idx)
+
+        with self.assertRaises(ValueError) as raises:
+            sdc_func(df)
+        msg = 'Index is not in the Series'
+        self.assertIn(msg, str(raises.exception))
+
     def test_df_loc(self):
         def test_impl(df):
             return df.loc[4]