Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 46c1212

Browse files
akharchedensmirn
andauthored
Re-implement df structure: refactor len (#868)
* Re-implement df structure: refactor len * Undecorated all the remaining methods Co-authored-by: Denis <denis.smirnov@intel.com>
1 parent 9b7c990 commit 46c1212

File tree

5 files changed

+18
-25
lines changed

5 files changed

+18
-25
lines changed

sdc/hiframes/pd_dataframe_ext.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,7 @@ def df_len_overload(df):
160160

161161
if len(df.columns) == 0: # empty df
162162
return lambda df: 0
163-
return lambda df: len(df._data[0])
163+
return lambda df: len(df._data[0][0])
164164

165165

166166
# handle getitem for Tuples because sometimes df._data[i] in

sdc/tests/test_dataframe.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,7 @@
5353
skip_numba_jit,
5454
skip_sdc_jit,
5555
test_global_input_data_float64,
56-
test_global_input_data_unicode_kind4,
57-
dfRefactoringNotImplemented)
56+
test_global_input_data_unicode_kind4)
5857

5958

6059
@sdc.jit
@@ -156,7 +155,7 @@ def test_impl(A, B, c):
156155
c = 2
157156
pd.testing.assert_frame_equal(hpat_func(A, B, c), test_impl(A, B, c))
158157

159-
@dfRefactoringNotImplemented
158+
@unittest.skip('Implement feature to create DataFrame without column names')
160159
def test_create_without_column_names(self):
161160
def test_impl():
162161
df = pd.DataFrame([100, 200, 300, 400, 200, 100])
@@ -1405,7 +1404,7 @@ def test_impl():
14051404
sdc_func = sdc.jit(test_impl)
14061405
pd.testing.assert_frame_equal(sdc_func(), test_impl())
14071406

1408-
@dfRefactoringNotImplemented
1407+
@unittest.skip("SDC Dataframe.loc[] always return Dataframe")
14091408
def test_df_loc_str(self):
14101409
def test_impl(df):
14111410
return df.loc['c']
@@ -1417,7 +1416,7 @@ def test_impl(df):
14171416
"C": ['3.1', '8.4', '7.1', '3.2', '1']}, index=idx)
14181417
pd.testing.assert_frame_equal(sdc_func(df), test_impl(df))
14191418

1420-
@dfRefactoringNotImplemented
1419+
@unittest.skip("SDC Dataframe.loc[] always return Dataframe")
14211420
def test_df_loc_no_idx(self):
14221421
def test_impl(df):
14231422
return df.loc[2]
@@ -2658,7 +2657,6 @@ def test_impl():
26582657
self.assertTrue(isinstance(two, np.ndarray))
26592658
self.assertTrue(isinstance(three, np.ndarray))
26602659

2661-
@dfRefactoringNotImplemented
26622660
def test_df_len(self):
26632661
def test_impl(df):
26642662
return len(df)
@@ -2709,27 +2707,27 @@ def test_impl():
27092707
hpat_func = self.jit(test_impl)
27102708
pd.testing.assert_series_equal(hpat_func(), test_impl())
27112709

2712-
@dfRefactoringNotImplemented
27132710
def test_df_iterate_over_columns2(self):
27142711
""" Verifies iteration over unboxed df columns using literal unroll. """
27152712
from sdc.hiframes.api import get_nan_mask
27162713

27172714
@self.jit
2718-
def jitted_func(df):
2715+
def jitted_func():
2716+
cols = ('A', 'B', 'C', 'D')
2717+
df = pd.DataFrame({
2718+
'A': ['a', 'b', None, 'a', '', None, 'b'],
2719+
'B': ['a', 'b', 'd', 'a', '', 'c', 'b'],
2720+
'C': [np.nan, 1, 2, 1, np.nan, 2, 1],
2721+
'D': [1, 2, 9, 5, 2, 1, 0]
2722+
})
27192723
res_nan_mask = np.zeros(len(df), dtype=np.bool_)
2720-
for col in literal_unroll(df._data):
2721-
res_nan_mask += get_nan_mask(col)
2724+
for col in literal_unroll(cols):
2725+
res_nan_mask += get_nan_mask(df[col].values)
27222726
return res_nan_mask
27232727

2724-
df = pd.DataFrame({
2725-
'A': ['a', 'b', None, 'a', '', None, 'b'],
2726-
'B': ['a', 'b', 'd', 'a', '', 'c', 'b'],
2727-
'C': [np.nan, 1, 2, 1, np.nan, 2, 1],
2728-
'D': [1, 2, 9, 5, 2, 1, 0]
2729-
})
27302728
# expected is a boolean mask of df rows that have None values
27312729
expected = np.asarray([True, False, True, False, True, True, False])
2732-
result = jitted_func(df)
2730+
result = jitted_func()
27332731
np.testing.assert_array_equal(result, expected)
27342732

27352733

sdc/tests/test_groupby.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
get_start_end,
4343
skip_numba_jit,
4444
skip_sdc_jit,
45-
sdc_limitation,
46-
dfRefactoringNotImplemented)
45+
sdc_limitation)
4746
from sdc.tests.test_series import gen_frand_array
4847

4948

sdc/tests/test_rolling.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@
4141
from sdc.tests.test_series import gen_frand_array
4242
from sdc.tests.test_utils import (count_array_REPs, count_parfor_REPs,
4343
skip_numba_jit, skip_sdc_jit,
44-
test_global_input_data_float64,
45-
dfRefactoringNotImplemented)
44+
test_global_input_data_float64)
4645

4746

4847
LONG_TEST = (int(os.environ['SDC_LONG_ROLLING_TEST']) != 0

sdc/tests/test_utils.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -212,9 +212,6 @@ def skip_inline(msg_or_func):
212212
return wrapper(func) if func else wrapper
213213

214214

215-
dfRefactoringNotImplemented = unittest.expectedFailure
216-
217-
218215
def take_k_elements(k, data, repeat=False, seed=None):
219216
if seed is not None:
220217
np.random.seed(seed)

0 commit comments

Comments
 (0)