Skip to content
This repository was archived by the owner on Feb 2, 2024. It is now read-only.

Commit 1f00705

Browse files
authored
Re-implement df.copy based on new structure (#854)
1 parent 02d56d0 commit 1f00705

File tree

2 files changed

+33
-8
lines changed

2 files changed

+33
-8
lines changed

sdc/datatypes/hpat_pandas_dataframe_functions.py

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -548,22 +548,27 @@ def head_overload(df, n=5):
548548

549549
def _dataframe_codegen_copy(func_params, series_params, df):
550550
"""
551-
Example func_text for func_name='copy' columns=('A', 'B', 'C'):
551+
Example func_text for func_name='copy' columns=('A', 'B'):
552552
def _df_copy_impl(df, deep=True):
553-
series_0 = pandas.Series(df._data[0])
554-
result_0 = series_0.copy(deep=deep)
555-
series_1 = pandas.Series(df._data[1])
556-
result_1 = series_1.copy(deep=deep)
557-
return pandas.DataFrame({"A": result_0, "B": result_1}, index=df._index)
553+
data_0 = df._data[0][0]
554+
series_0 = pandas.Series(data_0, name='A')
555+
result_0 = series_0.copy(deep=deep)
556+
data_1 = df._data[1][0]
557+
series_1 = pandas.Series(data_1, name='B')
558+
result_1 = series_1.copy(deep=deep)
559+
return pandas.DataFrame({"A": result_0, "B": result_1}, index=df._index)
558560
"""
559561
results = []
560562
series_params_str = ', '.join(kwsparams2list(series_params))
561563
func_params_str = ', '.join(kwsparams2list(func_params))
562564
func_lines = [f"def _df_copy_impl(df, {func_params_str}):"]
563565
index = df_index_codegen_all(df)
564566
for i, c in enumerate(df.columns):
567+
col_loc = df.column_loc[c]
568+
type_id, col_id = col_loc.type_id, col_loc.col_id
565569
result_c = f"result_{i}"
566-
func_lines += [f" series_{i} = pandas.Series(df._data[{i}], name='{c}')",
570+
func_lines += [f" data_{i} = df._data[{type_id}][{col_id}]",
571+
f" series_{i} = pandas.Series(data_{i}, name='{c}')",
567572
f" {result_c} = series_{i}.copy({series_params_str})"]
568573
results.append((c, result_c))
569574

sdc/tests/test_dataframe.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ def impl(a):
13621362
)
13631363
pd.testing.assert_frame_equal(sdc_func(df), ref_impl(df))
13641364

1365-
@dfRefactoringNotImplemented
1365+
@dfRefactoringNotImplemented # required re-implementing DataFrame unboxing
13661366
def test_df_copy(self):
13671367
def test_impl(df, deep):
13681368
return df.copy(deep=deep)
@@ -1380,6 +1380,26 @@ def test_impl(df, deep):
13801380
with self.subTest(index=idx, deep=deep):
13811381
pd.testing.assert_frame_equal(sdc_func(df, deep), test_impl(df, deep))
13821382

1383+
@dfRefactoringNotImplemented # required re-implementing DataFrame boxing
1384+
def test_df_copy_no_unboxing(self):
1385+
def test_impl(idx, deep):
1386+
df = pd.DataFrame({
1387+
'A': [3.2, np.nan, 7.0, 3.3, np.nan],
1388+
'B': [3, 4, 1, 0, 222],
1389+
'C': [True, True, False, False, True],
1390+
'D': ['a', 'dd', 'c', '12', None]
1391+
}, index=idx)
1392+
return df.copy(deep=deep)
1393+
1394+
sdc_impl = sdc.jit(test_impl)
1395+
indexes = [[3, 4, 2, 6, 1], ['a', 'b', 'c', 'd', 'e'], None]
1396+
cases_deep = [None, True, False]
1397+
for idx, deep in product(indexes, cases_deep):
1398+
with self.subTest(index=idx, deep=deep):
1399+
jit_result = sdc_impl(idx, deep)
1400+
ref_result = test_impl(idx, deep)
1401+
pd.testing.assert_frame_equal(jit_result, ref_result)
1402+
13831403
@dfRefactoringNotImplemented # required re-implementing DataFrame boxing
13841404
def test_pct_change1(self):
13851405
def test_impl(n):

0 commit comments

Comments
 (0)