From 7c8c742879a9128177c07740f020e19d4855d393 Mon Sep 17 00:00:00 2001 From: linfeng <33561138+lyne7-sc@users.noreply.github.com> Date: Tue, 30 Jun 2026 17:43:51 +0800 Subject: [PATCH 1/4] test: add slt for cardinality with nested arrays --- .../test_files/array/cardinality.slt | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/datafusion/sqllogictest/test_files/array/cardinality.slt b/datafusion/sqllogictest/test_files/array/cardinality.slt index 52b1a2b5445d9..f6d7623a111ae 100644 --- a/datafusion/sqllogictest/test_files/array/cardinality.slt +++ b/datafusion/sqllogictest/test_files/array/cardinality.slt @@ -51,6 +51,27 @@ select cardinality(arrow_cast([[1, 2], [3, 4], [5, 6]], 'FixedSizeList(3, List(I ---- 6 +# cardinality counts actual leaf elements in ragged nested arrays +query III +select cardinality([[1], [2, 3]]), + cardinality([[1, 2, 3], []]), + cardinality([[], [1, 2]]); +---- +3 3 2 + +query I +select cardinality(arrow_cast([[1], [2, 3]], 'LargeList(List(Int64))')); +---- +3 + +query IIII +select cardinality([[NULL], [1, 2]]), + cardinality([[[1]], [[2, 3], []]]), + cardinality(arrow_cast([[], [1, 2]], 'LargeList(List(Int64))')), + cardinality(make_array(NULL::int[], [1, 2])); +---- +3 3 2 2 + # cardinality scalar function #3 query II select cardinality(make_array()), cardinality(make_array(make_array())) From 8436b02aa09372eb80146612ebaf5fb08146509b Mon Sep 17 00:00:00 2001 From: linfeng <33561138+lyne7-sc@users.noreply.github.com> Date: Tue, 30 Jun 2026 17:47:49 +0800 Subject: [PATCH 2/4] fix: count ragged nested array cardinality correctly --- .../functions-nested/src/cardinality.rs | 38 +++++++++++++++---- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index d21bb72a457a8..1368f30ca8f90 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -146,14 +146,38 @@ fn generic_list_cardinality( let result = array .iter() .map(|arr| match arr { - Some(arr) if arr.is_empty() => Ok(Some(0u64)), - arr => match crate::utils::compute_array_dims(arr)? { - Some(vector) => { - Ok(Some(vector.iter().map(|x| x.unwrap()).product::())) - } - None => Ok(None), - }, + Some(arr) => value_cardinality(arr).map(Some), + None => Ok(None), }) .collect::>()?; Ok(Arc::new(result) as ArrayRef) } + +fn value_cardinality(array: ArrayRef) -> Result { + match array.data_type() { + List(_) => { + let list = as_list_array(&array)?; + sum_list_cardinality(list.iter()) + } + LargeList(_) => { + let list = as_large_list_array(&array)?; + sum_list_cardinality(list.iter()) + } + _ => Ok(array.len() as u64), + } +} + +fn sum_list_cardinality(mut iter: I) -> Result +where + I: Iterator>, +{ + iter.try_fold(0u64, |total, arr| { + let value_count = match arr { + Some(arr) => value_cardinality(arr)?, + None => 0, + }; + total.checked_add(value_count).ok_or_else(|| { + datafusion_common::exec_datafusion_err!("cardinality overflowed u64") + }) + }) +} From 3be9d99f7d3ff2e2d8a888870b3d3e7e910bdecd Mon Sep 17 00:00:00 2001 From: linfeng <33561138+lyne7-sc@users.noreply.github.com> Date: Wed, 1 Jul 2026 10:36:29 +0800 Subject: [PATCH 3/4] recurse through nested ListView and FixedSizeList cardinality --- .../functions-nested/src/cardinality.rs | 27 +++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index 1368f30ca8f90..38def2d4e4afe 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -23,10 +23,15 @@ use arrow::array::{ }; use arrow::datatypes::{ DataType, - DataType::{LargeList, List, Map, Null, UInt64}, + DataType::{ + FixedSizeList, LargeList, LargeListView, List, ListView, Map, Null, UInt64, + }, }; use datafusion_common::Result; -use datafusion_common::cast::{as_large_list_array, as_list_array, as_map_array}; +use datafusion_common::cast::{ + as_fixed_size_list_array, as_large_list_array, as_large_list_view_array, + as_list_array, as_list_view_array, as_map_array, +}; use datafusion_common::exec_err; use datafusion_common::utils::{ListCoercion, take_function_args}; use datafusion_expr::{ @@ -146,14 +151,14 @@ fn generic_list_cardinality( let result = array .iter() .map(|arr| match arr { - Some(arr) => value_cardinality(arr).map(Some), + Some(arr) => value_cardinality(&arr).map(Some), None => Ok(None), }) .collect::>()?; Ok(Arc::new(result) as ArrayRef) } -fn value_cardinality(array: ArrayRef) -> Result { +fn value_cardinality(array: &ArrayRef) -> Result { match array.data_type() { List(_) => { let list = as_list_array(&array)?; @@ -163,6 +168,18 @@ fn value_cardinality(array: ArrayRef) -> Result { let list = as_large_list_array(&array)?; sum_list_cardinality(list.iter()) } + ListView(_) => { + let list = as_list_view_array(&array)?; + sum_list_cardinality(list.iter()) + } + LargeListView(_) => { + let list = as_large_list_view_array(&array)?; + sum_list_cardinality(list.iter()) + } + FixedSizeList(..) => { + let list = as_fixed_size_list_array(&array)?; + sum_list_cardinality(list.iter()) + } _ => Ok(array.len() as u64), } } @@ -173,7 +190,7 @@ where { iter.try_fold(0u64, |total, arr| { let value_count = match arr { - Some(arr) => value_cardinality(arr)?, + Some(arr) => value_cardinality(&arr)?, None => 0, }; total.checked_add(value_count).ok_or_else(|| { From a0754c0f468bb8b0c48b2c584cae14e978eac322 Mon Sep 17 00:00:00 2001 From: linfeng <33561138+lyne7-sc@users.noreply.github.com> Date: Wed, 1 Jul 2026 10:38:10 +0800 Subject: [PATCH 4/4] mixed nested list cardinality --- .../test_files/array/cardinality.slt | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/datafusion/sqllogictest/test_files/array/cardinality.slt b/datafusion/sqllogictest/test_files/array/cardinality.slt index f6d7623a111ae..c0ad54e97a8f3 100644 --- a/datafusion/sqllogictest/test_files/array/cardinality.slt +++ b/datafusion/sqllogictest/test_files/array/cardinality.slt @@ -59,10 +59,20 @@ select cardinality([[1], [2, 3]]), ---- 3 3 2 -query I -select cardinality(arrow_cast([[1], [2, 3]], 'LargeList(List(Int64))')); +query IIII +select cardinality(arrow_cast([[1], [2, 3]], 'ListView(List(Int64))')), + cardinality(arrow_cast([[1], [2, 3]], 'LargeListView(List(Int64))')), + cardinality(arrow_cast([[1, 2], [3, 4]], 'List(FixedSizeList(2, Int64))')), + cardinality(arrow_cast([[1], [2, 3]], 'LargeList(List(Int64))')); +---- +3 3 4 3 + +query III +select cardinality(arrow_cast([[[1]], [[2, 3], []]], 'List(ListView(List(Int64)))')), + cardinality(arrow_cast([[[1]], [[2, 3], []]], 'List(LargeListView(List(Int64)))')), + cardinality(arrow_cast([[[1, 2]], [[3, 4]]], 'List(List(FixedSizeList(2, Int64)))')); ---- -3 +3 3 4 query IIII select cardinality([[NULL], [1, 2]]),