Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions vortex-array/src/array/erased.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,6 +321,33 @@ impl ArrayRef {
}
}

/// Returns `true` if this array is *cheaply* known to be entirely null.
///
/// Unlike [`Self::all_invalid`], this performs no execution. It returns `true` only when
/// all-null-ness can be proven without running compute: a constant-null array, or a validity
/// that is statically all-invalid (including a constant-`false` validity array). A `false`
/// result therefore means "not provably all-null", *not* "contains valid values"; callers must
/// treat it as a conservative signal and fall back to their normal path.
///
/// Compute entry points use this to short-circuit an entirely-null input to a `Constant(null)`
/// result, skipping canonicalization.
pub fn definitely_all_null(&self) -> VortexResult<bool> {
if !self.dtype().is_nullable() {
return Ok(false);
}
if let Some(scalar) = self.as_constant() {
return Ok(scalar.is_null());
}
Ok(match self.validity()? {
Validity::NonNullable | Validity::AllValid => false,
Validity::AllInvalid => true,
Validity::Array(validity) => validity
.as_constant()
.and_then(|s| s.as_bool().value())
.is_some_and(|valid| !valid),
})
}

/// Returns the number of valid elements in the array.
pub fn valid_count(&self, ctx: &mut ExecutionCtx) -> VortexResult<usize> {
let len = self.len();
Expand Down Expand Up @@ -750,3 +777,65 @@ impl<V: VTable> Matcher for V {
Some(unsafe { ArrayView::new_unchecked(array, &inner.data) })
}
}

#[cfg(test)]
mod tests {
use vortex_buffer::buffer;

use crate::IntoArray;
use crate::arrays::ConstantArray;
use crate::arrays::PrimitiveArray;
use crate::dtype::DType;
use crate::dtype::Nullability;
use crate::dtype::PType;
use crate::scalar::Scalar;
use crate::validity::Validity;

#[test]
fn definitely_all_null_detects_constant_null() -> vortex_error::VortexResult<()> {
let dtype = DType::Primitive(PType::I32, Nullability::Nullable);
assert!(
ConstantArray::null(dtype, 4)
.into_array()
.definitely_all_null()?
);
assert!(
!ConstantArray::new(Scalar::primitive(1i32, Nullability::Nullable), 4)
.into_array()
.definitely_all_null()?
);
Ok(())
}

#[test]
fn definitely_all_null_via_validity() -> vortex_error::VortexResult<()> {
// AllInvalid validity on a concrete array.
assert!(
PrimitiveArray::new(buffer![0i32, 0, 0], Validity::AllInvalid)
.into_array()
.definitely_all_null()?
);

// A constant-`false` validity array: the representation all-null arrays will use once the
// `AllInvalid` variant is removed.
let const_false = Validity::Array(ConstantArray::new(false, 3).into_array());
assert!(
PrimitiveArray::new(buffer![0i32, 0, 0], const_false)
.into_array()
.definitely_all_null()?
);

// All-valid and non-nullable arrays are not all-null.
assert!(
!PrimitiveArray::new(buffer![1i32, 2, 3], Validity::AllValid)
.into_array()
.definitely_all_null()?
);
assert!(
!PrimitiveArray::new(buffer![1i32, 2, 3], Validity::NonNullable)
.into_array()
.definitely_all_null()?
);
Ok(())
}
}
9 changes: 9 additions & 0 deletions vortex-array/src/arrays/constant/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::fmt::Formatter;
use crate::array::Array;
use crate::array::ArrayParts;
use crate::arrays::Constant;
use crate::dtype::DType;
use crate::scalar::Scalar;

#[derive(Clone, Debug)]
Expand Down Expand Up @@ -49,4 +50,12 @@ impl Array<Constant> {
let data = ConstantData::new(scalar);
unsafe { Array::from_parts_unchecked(ArrayParts::new(Constant, dtype, len, data)) }
}

/// Construct an entirely-null constant array of the given nullable `dtype` and `len`.
///
/// This is the canonical representation of an all-null array: a single null scalar repeated
/// `len` times, carrying neither a values buffer nor a validity child.
pub fn null(dtype: DType, len: usize) -> Self {
Self::new(Scalar::null(dtype), len)
}
}
10 changes: 4 additions & 6 deletions vortex-array/src/arrays/list/compute/filter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ use vortex_mask::MaskIter;
use vortex_mask::MaskValues;

use crate::ArrayRef;
use crate::Canonical;
use crate::ExecutionCtx;
use crate::IntoArray;
use crate::array::ArrayView;
Expand Down Expand Up @@ -108,11 +107,10 @@ impl FilterKernel for List {
Validity::NonNullable => Validity::NonNullable,
Validity::AllValid => Validity::AllValid,
Validity::AllInvalid => {
let elements = Canonical::empty(array.element_dtype()).into_array();
let offsets = ConstantArray::new(0u64, selection.true_count() + 1).into_array();
return Ok(Some(unsafe {
ListArray::new_unchecked(elements, offsets, Validity::AllInvalid).into_array()
}));
// The list is entirely null, so the filtered result is an all-null list.
return Ok(Some(
ConstantArray::null(array.dtype().clone(), selection.true_count()).into_array(),
));
}
Validity::Array(a) => Validity::Array(a.filter(mask.clone())?),
};
Expand Down
15 changes: 5 additions & 10 deletions vortex-array/src/arrays/struct_/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,27 +6,22 @@ use vortex_error::VortexResult;
use crate::ArrayRef;
use crate::IntoArray;
use crate::array::ArrayView;
use crate::arrays::ConstantArray;
use crate::arrays::Struct;
use crate::arrays::StructArray;
use crate::arrays::dict::TakeReduce;
use crate::arrays::struct_::StructArrayExt;
use crate::builtins::ArrayBuiltins;
use crate::scalar::Scalar;
use crate::validity::Validity;

impl TakeReduce for Struct {
fn take(array: ArrayView<'_, Struct>, indices: &ArrayRef) -> VortexResult<Option<ArrayRef>> {
// If the struct array is empty then the indices must be all null, otherwise it will access
// an out of bounds element.
// an out of bounds element. The result is therefore an all-null struct.
if array.is_empty() {
return StructArray::try_new_with_dtype(
array.iter_unmasked_fields().cloned().collect::<Vec<_>>(),
array.struct_fields().clone(),
indices.len(),
Validity::AllInvalid,
)
.map(StructArray::into_array)
.map(Some);
return Ok(Some(
ConstantArray::null(array.dtype().as_nullable(), indices.len()).into_array(),
));
}

// TODO(connor): This could be bad for cache locality...
Expand Down
2 changes: 1 addition & 1 deletion vortex-array/src/scalar_fn/fns/fill_null/kernel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ pub(super) fn precondition(
}

// If all values are null, replace the entire array with the fill value.
if matches!(array.validity()?, Validity::AllInvalid) {
if array.definitely_all_null()? {
return Ok(Some(
ConstantArray::new(fill_value.clone(), array.len()).into_array(),
));
Expand Down
4 changes: 4 additions & 0 deletions vortex-array/src/scalar_fn/fns/is_not_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,10 @@ impl ScalarFnVTable for IsNotNull {
_ctx: &mut ExecutionCtx,
) -> VortexResult<ArrayRef> {
let child = args.get(0)?;
// Cheap short-circuit: an entirely-null input is null everywhere.
if child.definitely_all_null()? {
return Ok(ConstantArray::new(false, args.row_count()).into_array());
}
match child.validity()? {
Validity::NonNullable | Validity::AllValid => {
Ok(ConstantArray::new(true, args.row_count()).into_array())
Expand Down
4 changes: 4 additions & 0 deletions vortex-array/src/scalar_fn/fns/is_null.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ impl ScalarFnVTable for IsNull {
if let Some(scalar) = child.as_constant() {
return Ok(ConstantArray::new(scalar.is_null(), args.row_count()).into_array());
}
// Cheap short-circuit: an entirely-null input is null everywhere.
if child.definitely_all_null()? {
return Ok(ConstantArray::new(true, args.row_count()).into_array());
}

match child.validity()? {
Validity::NonNullable | Validity::AllValid => {
Expand Down
10 changes: 0 additions & 10 deletions vortex-array/src/validity.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,16 +204,6 @@ impl Validity {
}
}

// Invert the validity
pub fn not(&self) -> VortexResult<Self> {
match self {
Validity::NonNullable => Ok(Validity::NonNullable),
Validity::AllValid => Ok(Validity::AllInvalid),
Validity::AllInvalid => Ok(Validity::AllValid),
Validity::Array(arr) => Ok(Validity::Array(arr.not()?)),
}
}

/// Lazily filters a [`Validity`] with a selection mask, which keeps only the entries for which
/// the mask is true.
///
Expand Down