diff --git a/bigframes/dataframe.py b/bigframes/dataframe.py index c9df349556..7f7268a9d5 100644 --- a/bigframes/dataframe.py +++ b/bigframes/dataframe.py @@ -3924,6 +3924,10 @@ def map(self, func, na_action: Optional[str] = None) -> DataFrame: ) def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): + # In Bigframes remote function, DataFrame '.apply' method is specifically + # designed to work with row-wise or column-wise operations, where the input + # to the applied function should be a Series, not a scalar. + if utils.get_axis_number(axis) == 1: msg = "axis=1 scenario is in preview." warnings.warn(msg, category=bfe.PreviewWarning) @@ -4031,8 +4035,19 @@ def apply(self, func, *, axis=0, args: typing.Tuple = (), **kwargs): return result_series + # At this point column-wise or element-wise remote function operation will + # be performed (not supported). + if hasattr(func, "bigframes_remote_function"): + raise NotImplementedError( + "BigFrames DataFrame '.apply()' does not support remote function " + "for column-wise (i.e. with axis=0) operations, please use a " + "regular python function instead. For element-wise operations of " + "the remote function, please use '.map()'." + ) + # Per-column apply results = {name: func(col, *args, **kwargs) for name, col in self.items()} + if all( [ isinstance(val, bigframes.series.Series) or utils.is_list_like(val) diff --git a/tests/system/small/test_remote_function.py b/tests/system/small/test_remote_function.py index 808bf1c055..c3f3890459 100644 --- a/tests/system/small/test_remote_function.py +++ b/tests/system/small/test_remote_function.py @@ -968,6 +968,30 @@ def test_read_gbq_function_supported_python_output_type( rf.read_gbq_function(str(sql_routine.reference), session=session) +@pytest.mark.flaky(retries=2, delay=120) +def test_df_apply_scalar_func(session, scalars_dfs): + scalars_df, _ = scalars_dfs + bdf = bigframes.pandas.DataFrame( + { + "Column1": scalars_df["string_col"], + "Column2": scalars_df["string_col"], + } + ) + + # The "cw_lower_case_ascii_only" is a scalar function. + func_ref = session.read_gbq_function("bqutil.fn.cw_lower_case_ascii_only") + + # DataFrame '.apply()' only supports series level application. + with pytest.raises(NotImplementedError) as context: + bdf.apply(func_ref) + assert str(context.value) == ( + "BigFrames DataFrame '.apply()' does not support remote function for " + "column-wise (i.e. with axis=0) operations, please use a regular python " + "function instead. For element-wise operations of the remote function, " + "please use '.map()'." + ) + + @pytest.mark.flaky(retries=2, delay=120) def test_read_gbq_function_multiple_inputs_not_a_row_processor(session): with pytest.raises(ValueError) as context: