Skip to content

Commit 2ef4d4c

Browse files
committed
first commit set up scaffolding
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent ca770ed commit 2ef4d4c

File tree

5 files changed

+207
-8
lines changed

5 files changed

+207
-8
lines changed
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::mem::MaybeUninit;
5+
6+
use vortex_buffer::BufferMut;
7+
use vortex_compute::filter::Filter;
8+
use vortex_dtype::match_each_integer_ptype;
9+
use vortex_mask::Mask;
10+
use vortex_vector::VectorMutOps;
11+
use vortex_vector::primitive::{PVector, PVectorMut, PrimitiveVector};
12+
13+
use crate::BitPackedArray;
14+
use crate::bitpacking::array::BitPacked;
15+
16+
/// The threshold over which it is faster to fully unpack the entire [`BitPackedArray`] and then
17+
/// filter the result than to unpack only specific bitpacked values into the output buffer.
18+
const fn unpack_then_filter_threshold<T>() -> f64 {
19+
// TODO(connor): Where did these numbers come from? Add a public link after validating them.
20+
// These numbers probably don't work for in-place filtering either.
21+
match size_of::<T>() {
22+
1 => 0.03,
23+
2 => 0.03,
24+
4 => 0.075,
25+
_ => 0.09,
26+
// >8 bytes may have a higher threshold. These numbers are derived from a GCP c2-standard-4
27+
// with a "Cascade Lake" CPU.
28+
}
29+
}
30+
31+
/// Helper function to get the true count of a mask with a default if it doesn't exist.
32+
fn true_count_of_optional_mask(selection_mask: &Option<&Mask>, default: usize) -> usize {
33+
selection_mask
34+
.map(|mask| mask.true_count())
35+
.unwrap_or(default)
36+
}
37+
38+
/// Given a [`BitPackedArray`], unpacks all bitpacked values and creates a new [`PrimitiveVector`].
39+
///
40+
/// If a selection mask is passed in, the resultant vector will have the specified values filtered
41+
/// out.
42+
///
43+
/// Internally, this calls [`unpack_into_pvector`].
44+
pub fn unpack_into_vector(
45+
array: &BitPackedArray,
46+
selection_mask: Option<&Mask>,
47+
) -> PrimitiveVector {
48+
match_each_integer_ptype!(array.ptype(), |T| {
49+
unpack_into_pvector::<T>(array, selection_mask).into()
50+
})
51+
}
52+
53+
/// Given a [`BitPackedArray`] and a template type `T: BitPacked`, unpacks all bitpacked values and
54+
/// creates a new [`PVector<T>`].
55+
///
56+
/// If a selection mask is passed in, the resultant vector will have the specified values filtered
57+
/// out.
58+
///
59+
/// Internally, this calls [`write_unpacked_to_pvector`].
60+
pub fn unpack_into_pvector<T: BitPacked>(
61+
array: &BitPackedArray,
62+
selection_mask: Option<&Mask>,
63+
) -> PVector<T> {
64+
let num_new_values = true_count_of_optional_mask(&selection_mask, array.len());
65+
let mut pvector = PVectorMut::with_capacity(num_new_values);
66+
67+
if num_new_values == 0 {
68+
return pvector.freeze();
69+
}
70+
71+
write_unpacked_to_pvector(array, selection_mask, &mut pvector);
72+
73+
pvector.freeze()
74+
}
75+
76+
/// Given a [`BitPackedArray`] and a template type `T: BitPacked`, unpacks all bitpacked values and
77+
/// writes them directly into an existing [`PVectorMut<T>`].
78+
///
79+
/// If a selection mask is passed in, the specified values will not be written.
80+
pub fn write_unpacked_to_pvector<T: BitPacked>(
81+
array: &BitPackedArray,
82+
selection_mask: Option<&Mask>,
83+
vector: &mut PVectorMut<T>,
84+
) {
85+
let num_new_values = true_count_of_optional_mask(&selection_mask, array.len());
86+
if num_new_values == 0 {
87+
return;
88+
}
89+
90+
let validity_mask = match selection_mask {
91+
Some(selection_mask) => array.validity_mask().filter(selection_mask),
92+
None => array.validity_mask(),
93+
};
94+
debug_assert_eq!(validity_mask.len(), num_new_values);
95+
96+
// SAFETY: We add the same amount of elements to both the buffer and the validity mask.
97+
let (buffer_mut, vector_validity_mut) = unsafe { vector.mut_parts() };
98+
99+
// We need to write the unpacked values to the buffer as well as update the validity mask.
100+
write_unpacked_to_buffer(array, selection_mask, buffer_mut);
101+
vector_validity_mut.append_mask(&validity_mask);
102+
103+
debug_assert_eq!(buffer_mut.len(), vector_validity_mut.len());
104+
}
105+
106+
/// Given a [`BitPackedArray`] and a template type `T: BitPacked`, unpacks all bitpacked values and
107+
/// writes them directly into an existing [`BufferMut<T>`].
108+
///
109+
/// If a selection mask is passed in, the specified values will not be written.
110+
///
111+
/// WARNING: This function will completely ignore the validity mask of the [`BitPackedArray`], so
112+
/// this should only be called from [`write_unpacked_to_pvector`].
113+
pub fn write_unpacked_to_buffer<T: BitPacked>(
114+
array: &BitPackedArray,
115+
selection_mask: Option<&Mask>,
116+
buffer: &mut BufferMut<T>,
117+
) {
118+
let num_new_values = true_count_of_optional_mask(&selection_mask, array.len());
119+
if num_new_values == 0 {
120+
return;
121+
}
122+
123+
let old_buffer_len = buffer.len();
124+
buffer.reserve(num_new_values);
125+
126+
// We will be unpacking values directly into the uninitialized region of the buffer.
127+
let buffer_uninit_slice = &mut buffer.spare_capacity_mut()[..num_new_values];
128+
129+
// If the selection mask is sparse, then we want to filter the bitpacked values while we are
130+
// unpacking all of the values.
131+
if let Some(selection) = selection_mask
132+
&& selection.density() < unpack_then_filter_threshold::<T>()
133+
{
134+
filter_while_unpacking_array(array, selection, buffer_uninit_slice);
135+
136+
// SAFETY: `filter_while_unpacking_array` writes exactly `num_new_values` values into the
137+
// buffer, so we know that all values up to the new length are initialized.
138+
unsafe { buffer.set_len(old_buffer_len + num_new_values) };
139+
140+
return;
141+
}
142+
143+
// Otherwise, if the selection mask is dense, then we might as well unpack all of the values and
144+
// then perform filtering.
145+
unpack_array(array, buffer_uninit_slice);
146+
147+
// SAFETY: `unpack_array` fully unpacks the bitpacked array and writes `array.len()` values into
148+
// the buffer, so we know that all values up to the new length are initialized.
149+
unsafe { buffer.set_len(old_buffer_len + array.len()) };
150+
151+
// Now that the array has been unpacked, apply the filter in-place.
152+
if let Some(selection) = selection_mask {
153+
buffer.filter(selection)
154+
}
155+
}
156+
157+
/// Unpacks the bitpacked values in the [`BitPackedArray`] directly into a mutable buffer.
158+
///
159+
/// On return, all values in the given buffer will have been initialized.
160+
///
161+
/// Note that the caller should probably ensure that there array isn't empty and that the true count
162+
/// of the selection mask isn't 0 for performance purposes.
163+
///
164+
/// WARNING: This function will completely ignore the validity mask of the [`BitPackedArray`], so
165+
/// this should only be called from [`write_unpacked_to_pvector`].
166+
fn unpack_array<T: BitPacked>(array: &BitPackedArray, buffer: &mut [MaybeUninit<T>]) {
167+
todo!()
168+
}
169+
170+
/// Unpacks the bitpacked array into the given buffer according to the given selection mask.
171+
///
172+
/// On return, all values in the given buffer will have been initialized.
173+
///
174+
/// Note that the caller should probably ensure that there array isn't empty and that the true count
175+
/// of the selection mask isn't 0 for performance purposes.
176+
///
177+
/// WARNING: This function will completely ignore the validity mask of the [`BitPackedArray`], so
178+
/// this should only be called from [`write_unpacked_to_pvector`].
179+
fn filter_while_unpacking_array<T: BitPacked>(
180+
array: &BitPackedArray,
181+
selection_mask: &Mask,
182+
buffer: &mut [MaybeUninit<T>],
183+
) {
184+
todo!()
185+
}

encodings/fastlanes/src/bitpacking/array/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use vortex_error::{VortexResult, vortex_bail, vortex_ensure};
1313

1414
pub mod bitpack_compress;
1515
pub mod bitpack_decompress;
16+
pub mod bitpack_decompress_vector; // Name TBBS.
1617
pub mod unpack_iter;
1718

1819
use crate::bitpack_compress::bitpack_encode;

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
mod array;
5-
pub use array::{BitPackedArray, bitpack_compress, bitpack_decompress, unpack_iter};
5+
pub use array::{
6+
BitPackedArray, bitpack_compress, bitpack_decompress, bitpack_decompress_vector, unpack_iter,
7+
};
68

79
mod vtable;
810
pub use vtable::{BitPackedEncoding, BitPackedVTable};

vortex-vector/src/primitive/generic_mut.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,15 @@ impl<T> PVectorMut<T> {
7878
(self.elements, self.validity)
7979
}
8080

81-
/// Append n values to the vector.
82-
pub fn append_values(&mut self, value: T, n: usize)
83-
where
84-
T: Copy,
85-
{
86-
self.elements.push_n(value, n);
87-
self.validity.append_n(true, n);
81+
/// Decomposes a mutable reference to the primitive vector into mutable references to it's
82+
/// constituent parts (buffer and validity).
83+
///
84+
/// # Safety
85+
///
86+
/// The caller must ensure the [`BufferMut`] has the same length as the [`MaskMut`] once drop
87+
/// the exclusive references.
88+
pub unsafe fn mut_parts(&mut self) -> (&mut BufferMut<T>, &mut MaskMut) {
89+
(&mut self.elements, &mut self.validity)
8890
}
8991
}
9092

vortex-vector/src/primitive/generic_mut_impl.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ impl<T: NativePType> PVectorMut<T> {
6262
self.validity.append_n(false, 1);
6363
}
6464
}
65+
66+
/// Append `n` of the same `value` to the vector.
67+
pub fn append_values(&mut self, value: T, n: usize)
68+
where
69+
T: Copy,
70+
{
71+
self.elements.push_n(value, n);
72+
self.validity.append_n(true, n);
73+
}
6574
}
6675

6776
impl<T: NativePType> AsRef<[T]> for PVectorMut<T> {

0 commit comments

Comments
 (0)