From 612415516e1d06b9dd33b6ed592279c7bb1f3e97 Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 18 Jun 2026 16:38:17 +0800 Subject: [PATCH 1/2] feat: add deprecation warning for Expr passed to literal-only args - Introduced shared `_warn_if_expr_for_literal_arg` in `functions/__init__.py` - Added `DeprecationWarning` for the following methods when `Expr` is passed as argument: - `encode(..., encoding=Expr)` - `decode(..., encoding=Expr)` - `digest(..., method=Expr)` - `arrow_cast(..., data_type=Expr)` - `arrow_try_cast(..., data_type=Expr)` - `arrow_metadata(..., key=Expr)` test: update tests to check for warnings - Implemented tests in `test_functions.py` to ensure: - Warning is raised for `Expr` form - No warning for native literal form --- python/datafusion/functions/__init__.py | 13 ++++++ python/tests/test_functions.py | 59 +++++++++++++++++++++++++ 2 files changed, 72 insertions(+) diff --git a/python/datafusion/functions/__init__.py b/python/datafusion/functions/__init__.py index 54783f086..fb6a607eb 100644 --- a/python/datafusion/functions/__init__.py +++ b/python/datafusion/functions/__init__.py @@ -73,6 +73,13 @@ def _warn_expr_for_literal_arg(function_name: str, arg_name: str) -> None: ) +def _warn_if_expr_for_literal_arg( + value: Any, function_name: str, arg_name: str +) -> None: + if isinstance(value, Expr): + _warn_expr_for_literal_arg(function_name, arg_name) + + __all__ = [ "abs", "acos", @@ -437,6 +444,7 @@ def encode(expr: Expr, encoding: Expr | str) -> Expr: >>> result.collect_column("enc")[0].as_py() 'aGVsbG8' """ + _warn_if_expr_for_literal_arg(encoding, "encode", "encoding") encoding = coerce_to_expr(encoding) return Expr(f.encode(expr.expr, encoding.expr)) @@ -452,6 +460,7 @@ def decode(expr: Expr, encoding: Expr | str) -> Expr: >>> result.collect_column("dec")[0].as_py() b'hello' """ + _warn_if_expr_for_literal_arg(encoding, "decode", "encoding") encoding = coerce_to_expr(encoding) return Expr(f.decode(expr.expr, encoding.expr)) @@ -742,6 +751,7 @@ def digest(value: Expr, method: Expr | str) -> Expr: >>> len(result.collect_column("d")[0].as_py()) > 0 True """ + _warn_if_expr_for_literal_arg(method, "digest", "method") method = coerce_to_expr(method) return Expr(f.digest(value.expr, method.expr)) @@ -3096,6 +3106,7 @@ def arrow_cast(expr: Expr, data_type: Expr | str | pa.DataType) -> Expr: >>> result.collect_column("c")[0].as_py() 1.0 """ + _warn_if_expr_for_literal_arg(data_type, "arrow_cast", "data_type") if isinstance(data_type, pa.DataType): return expr.cast(data_type) if isinstance(data_type, str): @@ -3128,6 +3139,7 @@ def arrow_try_cast(expr: Expr, data_type: Expr | str | pa.DataType) -> Expr: >>> result.collect_column("c")[0].as_py() is None True """ + _warn_if_expr_for_literal_arg(data_type, "arrow_try_cast", "data_type") if isinstance(data_type, pa.DataType): return expr.try_cast(data_type) if isinstance(data_type, str): @@ -3235,6 +3247,7 @@ def arrow_metadata(expr: Expr, key: Expr | str | None = None) -> Expr: """ if key is None: return Expr(f.arrow_metadata(expr.expr)) + _warn_if_expr_for_literal_arg(key, "arrow_metadata", "key") if isinstance(key, str): key = Expr.string_literal(key) return Expr(f.arrow_metadata(expr.expr, key.expr)) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 43dd70660..557ea107b 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -2355,6 +2355,65 @@ def test_regexp_replace_native(self): ).collect() assert result[0].column(0)[0].as_py() == "aX bX cX" + @pytest.mark.parametrize( + ("func", "arg_name", "expr"), + [ + pytest.param( + f.encode, + "encoding", + lambda: f.encode(column("a"), literal("base64")), + id="encode-encoding", + ), + pytest.param( + f.decode, + "encoding", + lambda: f.decode(column("a"), literal("base64")), + id="decode-encoding", + ), + pytest.param( + f.digest, + "method", + lambda: f.digest(column("a"), literal("sha256")), + id="digest-method", + ), + pytest.param( + f.arrow_cast, + "data_type", + lambda: f.arrow_cast(column("a"), literal("Float64")), + id="arrow-cast-data-type", + ), + pytest.param( + f.arrow_try_cast, + "data_type", + lambda: f.arrow_try_cast(column("a"), literal("Float64")), + id="arrow-try-cast-data-type", + ), + pytest.param( + f.arrow_metadata, + "key", + lambda: f.arrow_metadata(column("a"), literal("k")), + id="arrow-metadata-key", + ), + ], + ) + def test_literal_only_expr_args_warn_deprecated(self, func, arg_name, expr): + with pytest.warns( + DeprecationWarning, + match=rf"Passing Expr for {func.__name__}\(\) argument '{arg_name}' is deprecated", + ): + result = expr() + assert result is not None + + def test_literal_only_native_args_do_not_warn(self): + with warnings.catch_warnings(): + warnings.simplefilter("error", DeprecationWarning) + assert f.encode(column("a"), "base64") is not None + assert f.decode(column("a"), "base64") is not None + assert f.digest(column("a"), "sha256") is not None + assert f.arrow_cast(column("a"), "Float64") is not None + assert f.arrow_try_cast(column("a"), pa.float64()) is not None + assert f.arrow_metadata(column("a"), "k") is not None + def test_backward_compat_with_lit(self): """Verify that existing code using lit() still works.""" ctx = SessionContext() From 529f05d5273482c2961ea5c793dcdd647dc18e0f Mon Sep 17 00:00:00 2001 From: Siew Kam Onn Date: Thu, 18 Jun 2026 16:56:22 +0800 Subject: [PATCH 2/2] fix(tests): resolve E501 line length issue in test_functions.py --- python/tests/test_functions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/tests/test_functions.py b/python/tests/test_functions.py index 557ea107b..3561fc947 100644 --- a/python/tests/test_functions.py +++ b/python/tests/test_functions.py @@ -2399,7 +2399,10 @@ def test_regexp_replace_native(self): def test_literal_only_expr_args_warn_deprecated(self, func, arg_name, expr): with pytest.warns( DeprecationWarning, - match=rf"Passing Expr for {func.__name__}\(\) argument '{arg_name}' is deprecated", + match=( + rf"Passing Expr for {func.__name__}\(\) argument " + rf"'{arg_name}' is deprecated" + ), ): result = expr() assert result is not None