From 2887ea78beab874bdac669a881692840272a0eaf Mon Sep 17 00:00:00 2001 From: Joseph Isaacs Date: Wed, 17 Jun 2026 11:50:00 +0000 Subject: [PATCH 1/5] feat(array): add ArrayRef::all_null and ConstantArray::null Foundation for representing all-null arrays as Constant(null) and removing Validity::AllInvalid (#8443). - ConstantArray::null(dtype, len) constructs the canonical all-null array: a single null scalar repeated, with no values buffer or validity child. - ArrayRef::all_null() is a cheap, non-executing, conservative check for "entirely null": true for a constant-null array or a statically all-invalid validity (including a constant-false validity array, the representation all-null arrays will use once AllInvalid is gone). It runs no compute, so a false result means "not provably all-null", not "has valid values". Compute entry points will call all_null() to short-circuit an entirely-null input to Constant(null) and skip canonicalization. Signed-off-by: Joseph Isaacs https://claude.ai/code/session_01Q8K741TL4zABgsL1N4kLWw --- vortex-array/src/array/erased.rs | 85 +++++++++++++++++++++++ vortex-array/src/arrays/constant/array.rs | 9 +++ 2 files changed, 94 insertions(+) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index bacda957660..827d68ff337 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -321,6 +321,33 @@ impl ArrayRef { } } + /// Returns `true` if this array is *cheaply* known to be entirely null. + /// + /// Unlike [`Self::all_invalid`], this performs no execution. It returns `true` only when + /// all-null-ness can be proven without running compute: a constant-null array, or a validity + /// that is statically all-invalid (including a constant-`false` validity array). A `false` + /// result therefore means "not provably all-null", *not* "contains valid values"; callers must + /// treat it as a conservative signal and fall back to their normal path. + /// + /// Compute entry points use this to short-circuit an entirely-null input to a `Constant(null)` + /// result, skipping canonicalization. + pub fn all_null(&self) -> VortexResult { + if !self.dtype().is_nullable() { + return Ok(false); + } + if let Some(scalar) = self.as_constant() { + return Ok(scalar.is_null()); + } + Ok(match self.validity()? { + Validity::NonNullable | Validity::AllValid => false, + Validity::AllInvalid => true, + Validity::Array(validity) => validity + .as_constant() + .and_then(|s| s.as_bool().value()) + .is_some_and(|valid| !valid), + }) + } + /// Returns the number of valid elements in the array. pub fn valid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult { let len = self.len(); @@ -750,3 +777,61 @@ impl Matcher for V { Some(unsafe { ArrayView::new_unchecked(array, &inner.data) }) } } + +#[cfg(test)] +mod tests { + use vortex_buffer::buffer; + + use crate::IntoArray; + use crate::arrays::ConstantArray; + use crate::arrays::PrimitiveArray; + use crate::dtype::DType; + use crate::dtype::Nullability; + use crate::dtype::PType; + use crate::scalar::Scalar; + use crate::validity::Validity; + + #[test] + fn all_null_detects_constant_null() -> vortex_error::VortexResult<()> { + let dtype = DType::Primitive(PType::I32, Nullability::Nullable); + assert!(ConstantArray::null(dtype, 4).into_array().all_null()?); + assert!( + !ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 4) + .into_array() + .all_null()? + ); + Ok(()) + } + + #[test] + fn all_null_via_validity() -> vortex_error::VortexResult<()> { + // AllInvalid validity on a concrete array. + assert!( + PrimitiveArray::new(buffer![0i32, 0, 0], Validity::AllInvalid) + .into_array() + .all_null()? + ); + + // A constant-`false` validity array: the representation all-null arrays will use once the + // `AllInvalid` variant is removed. + let const_false = Validity::Array(ConstantArray::new(false, 3).into_array()); + assert!( + PrimitiveArray::new(buffer![0i32, 0, 0], const_false) + .into_array() + .all_null()? + ); + + // All-valid and non-nullable arrays are not all-null. + assert!( + !PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllValid) + .into_array() + .all_null()? + ); + assert!( + !PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable) + .into_array() + .all_null()? + ); + Ok(()) + } +} diff --git a/vortex-array/src/arrays/constant/array.rs b/vortex-array/src/arrays/constant/array.rs index c6b47cd0a17..17d5e2e7ca1 100644 --- a/vortex-array/src/arrays/constant/array.rs +++ b/vortex-array/src/arrays/constant/array.rs @@ -7,6 +7,7 @@ use std::fmt::Formatter; use crate::array::Array; use crate::array::ArrayParts; use crate::arrays::Constant; +use crate::dtype::DType; use crate::scalar::Scalar; #[derive(Clone, Debug)] @@ -49,4 +50,12 @@ impl Array { let data = ConstantData::new(scalar); unsafe { Array::from_parts_unchecked(ArrayParts::new(Constant, dtype, len, data)) } } + + /// Construct an entirely-null constant array of the given nullable `dtype` and `len`. + /// + /// This is the canonical representation of an all-null array: a single null scalar repeated + /// `len` times, carrying neither a values buffer nor a validity child. + pub fn null(dtype: DType, len: usize) -> Self { + Self::new(Scalar::null(dtype), len) + } } From c439bbb5b1d43e1f444e6aa788f3d6078e05dafa Mon Sep 17 00:00:00 2001 From: Joseph Isaacs Date: Wed, 17 Jun 2026 13:52:29 +0000 Subject: [PATCH 2/5] refactor(validity): remove dead Validity::not Validity::not had no callers anywhere in the workspace: a repo-wide audit of every `.not()` site found only Mask, BitBuffer, and ArrayRef receivers, with no UFCS Validity::not call and no `impl Not for Validity`. It is also the only place that constructs Validity::AllInvalid without a length in scope (AllValid -> AllInvalid). Removing it eliminates the one structural blocker to deleting the AllInvalid variant (#8443): every remaining producer already has a length, so no length-threading through the Validity algebra is required. Signed-off-by: Joseph Isaacs https://claude.ai/code/session_01Q8K741TL4zABgsL1N4kLWw --- vortex-array/src/validity.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index 8d36c07405e..76adae6bf01 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -204,16 +204,6 @@ impl Validity { } } - // Invert the validity - pub fn not(&self) -> VortexResult { - match self { - Validity::NonNullable => Ok(Validity::NonNullable), - Validity::AllValid => Ok(Validity::AllInvalid), - Validity::AllInvalid => Ok(Validity::AllValid), - Validity::Array(arr) => Ok(Validity::Array(arr.not()?)), - } - } - /// Lazily filters a [`Validity`] with a selection mask, which keeps only the entries for which /// the mask is true. /// From 394951eeab6d75f0db035c8d0000851bf018adc0 Mon Sep 17 00:00:00 2001 From: Joseph Isaacs Date: Wed, 17 Jun 2026 14:34:06 +0000 Subject: [PATCH 3/5] refactor(array): wire core producers/entry-points to Constant(null) (#8443) Step 3 of removing Validity::AllInvalid. Exercises the foundation helpers: - list filter and struct take now return ConstantArray::null(...) for the all-null result instead of constructing an all-null concrete array. - is_null / is_not_null gain a cheap ArrayRef::all_null() short-circuit for entirely-null concrete inputs (the constant-input case is already handled). All changes are logically behavior-preserving: an all-null result is the same values and null mask whether encoded as Constant(null) or a concrete array. This also confirms the previously-unused all_null() and ConstantArray::null helpers now have real (non-test) call sites. Signed-off-by: Joseph Isaacs https://claude.ai/code/session_01Q8K741TL4zABgsL1N4kLWw --- vortex-array/src/arrays/list/compute/filter.rs | 10 ++++------ vortex-array/src/arrays/struct_/compute/take.rs | 15 +++++---------- vortex-array/src/scalar_fn/fns/is_not_null.rs | 4 ++++ vortex-array/src/scalar_fn/fns/is_null.rs | 4 ++++ 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/vortex-array/src/arrays/list/compute/filter.rs b/vortex-array/src/arrays/list/compute/filter.rs index 2b98e3c36d1..8dbb9be89b8 100644 --- a/vortex-array/src/arrays/list/compute/filter.rs +++ b/vortex-array/src/arrays/list/compute/filter.rs @@ -13,7 +13,6 @@ use vortex_mask::MaskIter; use vortex_mask::MaskValues; use crate::ArrayRef; -use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; @@ -108,11 +107,10 @@ impl FilterKernel for List { Validity::NonNullable => Validity::NonNullable, Validity::AllValid => Validity::AllValid, Validity::AllInvalid => { - let elements = Canonical::empty(array.element_dtype()).into_array(); - let offsets = ConstantArray::new(0u64, selection.true_count() + 1).into_array(); - return Ok(Some(unsafe { - ListArray::new_unchecked(elements, offsets, Validity::AllInvalid).into_array() - })); + // The list is entirely null, so the filtered result is an all-null list. + return Ok(Some( + ConstantArray::null(array.dtype().clone(), selection.true_count()).into_array(), + )); } Validity::Array(a) => Validity::Array(a.filter(mask.clone())?), }; diff --git a/vortex-array/src/arrays/struct_/compute/take.rs b/vortex-array/src/arrays/struct_/compute/take.rs index a63c6c85cf6..9850ae14065 100644 --- a/vortex-array/src/arrays/struct_/compute/take.rs +++ b/vortex-array/src/arrays/struct_/compute/take.rs @@ -6,27 +6,22 @@ use vortex_error::VortexResult; use crate::ArrayRef; use crate::IntoArray; use crate::array::ArrayView; +use crate::arrays::ConstantArray; use crate::arrays::Struct; use crate::arrays::StructArray; use crate::arrays::dict::TakeReduce; use crate::arrays::struct_::StructArrayExt; use crate::builtins::ArrayBuiltins; use crate::scalar::Scalar; -use crate::validity::Validity; impl TakeReduce for Struct { fn take(array: ArrayView<'_, Struct>, indices: &ArrayRef) -> VortexResult> { // If the struct array is empty then the indices must be all null, otherwise it will access - // an out of bounds element. + // an out of bounds element. The result is therefore an all-null struct. if array.is_empty() { - return StructArray::try_new_with_dtype( - array.iter_unmasked_fields().cloned().collect::>(), - array.struct_fields().clone(), - indices.len(), - Validity::AllInvalid, - ) - .map(StructArray::into_array) - .map(Some); + return Ok(Some( + ConstantArray::null(array.dtype().as_nullable(), indices.len()).into_array(), + )); } // TODO(connor): This could be bad for cache locality... diff --git a/vortex-array/src/scalar_fn/fns/is_not_null.rs b/vortex-array/src/scalar_fn/fns/is_not_null.rs index 589333304e2..c74e16df82b 100644 --- a/vortex-array/src/scalar_fn/fns/is_not_null.rs +++ b/vortex-array/src/scalar_fn/fns/is_not_null.rs @@ -84,6 +84,10 @@ impl ScalarFnVTable for IsNotNull { _ctx: &mut ExecutionCtx, ) -> VortexResult { let child = args.get(0)?; + // Cheap short-circuit: an entirely-null input is null everywhere. + if child.all_null()? { + return Ok(ConstantArray::new(false, args.row_count()).into_array()); + } match child.validity()? { Validity::NonNullable | Validity::AllValid => { Ok(ConstantArray::new(true, args.row_count()).into_array()) diff --git a/vortex-array/src/scalar_fn/fns/is_null.rs b/vortex-array/src/scalar_fn/fns/is_null.rs index 7315fbe8c07..2e8cf975c40 100644 --- a/vortex-array/src/scalar_fn/fns/is_null.rs +++ b/vortex-array/src/scalar_fn/fns/is_null.rs @@ -74,6 +74,10 @@ impl ScalarFnVTable for IsNull { if let Some(scalar) = child.as_constant() { return Ok(ConstantArray::new(scalar.is_null(), args.row_count()).into_array()); } + // Cheap short-circuit: an entirely-null input is null everywhere. + if child.all_null()? { + return Ok(ConstantArray::new(true, args.row_count()).into_array()); + } match child.validity()? { Validity::NonNullable | Validity::AllValid => { From 803e530f9c6aa8c0a738549890ecf88470af19e1 Mon Sep 17 00:00:00 2001 From: Joseph Isaacs Date: Wed, 17 Jun 2026 14:46:19 +0000 Subject: [PATCH 4/5] refactor(array): rename all_null to definitely_all_null (#8443) The check is conservative and non-executing: a false result means "not provably all-null", not "has valid values". Rename to definitely_all_null to make that contract explicit and mirror the existing Validity::definitely_no_nulls. Signed-off-by: Joseph Isaacs https://claude.ai/code/session_01Q8K741TL4zABgsL1N4kLWw --- vortex-array/src/array/erased.rs | 22 +++++++++++-------- vortex-array/src/scalar_fn/fns/is_not_null.rs | 2 +- vortex-array/src/scalar_fn/fns/is_null.rs | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 827d68ff337..f0bb806ee01 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -331,7 +331,7 @@ impl ArrayRef { /// /// Compute entry points use this to short-circuit an entirely-null input to a `Constant(null)` /// result, skipping canonicalization. - pub fn all_null(&self) -> VortexResult { + pub fn definitely_all_null(&self) -> VortexResult { if !self.dtype().is_nullable() { return Ok(false); } @@ -792,24 +792,28 @@ mod tests { use crate::validity::Validity; #[test] - fn all_null_detects_constant_null() -> vortex_error::VortexResult<()> { + fn definitely_all_null_detects_constant_null() -> vortex_error::VortexResult<()> { let dtype = DType::Primitive(PType::I32, Nullability::Nullable); - assert!(ConstantArray::null(dtype, 4).into_array().all_null()?); + assert!( + ConstantArray::null(dtype, 4) + .into_array() + .definitely_all_null()? + ); assert!( !ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 4) .into_array() - .all_null()? + .definitely_all_null()? ); Ok(()) } #[test] - fn all_null_via_validity() -> vortex_error::VortexResult<()> { + fn definitely_all_null_via_validity() -> vortex_error::VortexResult<()> { // AllInvalid validity on a concrete array. assert!( PrimitiveArray::new(buffer![0i32, 0, 0], Validity::AllInvalid) .into_array() - .all_null()? + .definitely_all_null()? ); // A constant-`false` validity array: the representation all-null arrays will use once the @@ -818,19 +822,19 @@ mod tests { assert!( PrimitiveArray::new(buffer![0i32, 0, 0], const_false) .into_array() - .all_null()? + .definitely_all_null()? ); // All-valid and non-nullable arrays are not all-null. assert!( !PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllValid) .into_array() - .all_null()? + .definitely_all_null()? ); assert!( !PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable) .into_array() - .all_null()? + .definitely_all_null()? ); Ok(()) } diff --git a/vortex-array/src/scalar_fn/fns/is_not_null.rs b/vortex-array/src/scalar_fn/fns/is_not_null.rs index c74e16df82b..352db68aa39 100644 --- a/vortex-array/src/scalar_fn/fns/is_not_null.rs +++ b/vortex-array/src/scalar_fn/fns/is_not_null.rs @@ -85,7 +85,7 @@ impl ScalarFnVTable for IsNotNull { ) -> VortexResult { let child = args.get(0)?; // Cheap short-circuit: an entirely-null input is null everywhere. - if child.all_null()? { + if child.definitely_all_null()? { return Ok(ConstantArray::new(false, args.row_count()).into_array()); } match child.validity()? { diff --git a/vortex-array/src/scalar_fn/fns/is_null.rs b/vortex-array/src/scalar_fn/fns/is_null.rs index 2e8cf975c40..5d7c0e29cba 100644 --- a/vortex-array/src/scalar_fn/fns/is_null.rs +++ b/vortex-array/src/scalar_fn/fns/is_null.rs @@ -75,7 +75,7 @@ impl ScalarFnVTable for IsNull { return Ok(ConstantArray::new(scalar.is_null(), args.row_count()).into_array()); } // Cheap short-circuit: an entirely-null input is null everywhere. - if child.all_null()? { + if child.definitely_all_null()? { return Ok(ConstantArray::new(true, args.row_count()).into_array()); } From 051d17f83e40d35c05f9a120e9ae6e7a2447def9 Mon Sep 17 00:00:00 2001 From: Joseph Isaacs Date: Wed, 17 Jun 2026 15:18:52 +0000 Subject: [PATCH 5/5] refactor(array): decouple fill_null all-null path via definitely_all_null (#8443) Replace the explicit `matches!(validity, Validity::AllInvalid)` check in the fill_null precondition with `array.definitely_all_null()?`. Behavior-preserving and slightly more general: it also short-circuits a constant-null input or a constant-false validity array (the representations all-null arrays move to), without matching the variant directly. Prepares the consumer for the eventual .validity() pivot. Signed-off-by: Joseph Isaacs https://claude.ai/code/session_01Q8K741TL4zABgsL1N4kLWw --- vortex-array/src/scalar_fn/fns/fill_null/kernel.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs b/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs index eea3dd6ef7b..aec175c566b 100644 --- a/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs +++ b/vortex-array/src/scalar_fn/fns/fill_null/kernel.rs @@ -79,7 +79,7 @@ pub(super) fn precondition( } // If all values are null, replace the entire array with the fill value. - if matches!(array.validity()?, Validity::AllInvalid) { + if array.definitely_all_null()? { return Ok(Some( ConstantArray::new(fill_value.clone(), array.len()).into_array(), ));