From 1acaae1cdd168de680c157d92d46eee10cdccaea Mon Sep 17 00:00:00 2001 From: Arwa Date: Mon, 30 Sep 2024 14:59:08 -0500 Subject: [PATCH 1/5] fix: fix generic error message when entering incorrect column name --- bigframes/core/groupby/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 5cb0e65729..aefae89ce1 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -88,9 +88,12 @@ def __getitem__( keys = list(key) else: keys = [key] - columns = [ - col_id for col_id, label in self._col_id_labels.items() if label in keys - ] + + for col_id, label in self._col_id_labels.items(): + if label in keys: + columns = [col_id] + else: + raise KeyError(f"Columns not found: {str(keys)[1:-1]}") if len(columns) > 1 or (not self._as_index): return DataFrameGroupBy( From d87b6cd09f842514958e4e484f0910f0569f4e7d Mon Sep 17 00:00:00 2001 From: Arwa Date: Mon, 30 Sep 2024 16:49:20 -0500 Subject: [PATCH 2/5] fix varibale initialization --- bigframes/core/groupby/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index aefae89ce1..91f5921a38 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -89,9 +89,11 @@ def __getitem__( else: keys = [key] + columns = [] + for col_id, label in self._col_id_labels.items(): if label in keys: - columns = [col_id] + columns.append(col_id) else: raise KeyError(f"Columns not found: {str(keys)[1:-1]}") From 30020ba89787c30a1b5e3a5946237b76c0b75641 Mon Sep 17 00:00:00 2001 From: Arwa Date: Wed, 2 Oct 2024 11:24:35 -0500 Subject: [PATCH 3/5] Add a test case --- tests/system/small/test_groupby.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index 8574860daa..3117c121aa 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -421,6 +421,20 @@ def test_dataframe_groupby_getitem( pd.testing.assert_series_equal(pd_result, bf_result, check_dtype=False) +def test_dataframe_groupby_getitem_error( + scalars_df_index, + scalars_pandas_df_index, +): + col_names = ["float64_col", "int64_col", "bool_col", "string_col"] + with pytest.raises(KeyError, match="\"Columns not found: 'not_in_group'\""): + ( + scalars_df_index[col_names] + .groupby("string_col")["not_in_group"] + .min() + .to_pandas() + ) + + def test_dataframe_groupby_getitem_list( scalars_df_index, scalars_pandas_df_index, From 71b23cc37da369749a26a910cf631b1dad1150c4 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 3 Oct 2024 14:28:00 -0500 Subject: [PATCH 4/5] Update code and fix failling tests --- bigframes/core/groupby/__init__.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/bigframes/core/groupby/__init__.py b/bigframes/core/groupby/__init__.py index 91f5921a38..2d351cf82d 100644 --- a/bigframes/core/groupby/__init__.py +++ b/bigframes/core/groupby/__init__.py @@ -89,13 +89,14 @@ def __getitem__( else: keys = [key] - columns = [] + bad_keys = [key for key in keys if key not in self._block.column_labels] - for col_id, label in self._col_id_labels.items(): - if label in keys: - columns.append(col_id) - else: - raise KeyError(f"Columns not found: {str(keys)[1:-1]}") + if len(bad_keys) > 0: + raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") + + columns = [ + col_id for col_id, label in self._col_id_labels.items() if label in keys + ] if len(columns) > 1 or (not self._as_index): return DataFrameGroupBy( From 04d23914cf0ad11b311f9a220e5e3e3cb813b2c8 Mon Sep 17 00:00:00 2001 From: Arwa Date: Thu, 3 Oct 2024 15:50:38 -0500 Subject: [PATCH 5/5] Add test case for multiple columns --- tests/system/small/test_groupby.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/system/small/test_groupby.py b/tests/system/small/test_groupby.py index 3117c121aa..2d5ae21bb4 100644 --- a/tests/system/small/test_groupby.py +++ b/tests/system/small/test_groupby.py @@ -435,6 +435,20 @@ def test_dataframe_groupby_getitem_error( ) +def test_dataframe_groupby_getitem_multiple_columns_error( + scalars_df_index, + scalars_pandas_df_index, +): + col_names = ["float64_col", "int64_col", "bool_col", "string_col"] + with pytest.raises(KeyError, match="\"Columns not found: 'col1', 'col2'\""): + ( + scalars_df_index[col_names] + .groupby("string_col")["col1", "col2"] + .min() + .to_pandas() + ) + + def test_dataframe_groupby_getitem_list( scalars_df_index, scalars_pandas_df_index,