Skip to content

Commit 238d298

Browse files
committed
first commit set up scaffolding
Signed-off-by: Connor Tsui <connor.tsui20@gmail.com>
1 parent 86f3bf9 commit 238d298

File tree

5 files changed

+194
-8
lines changed

5 files changed

+194
-8
lines changed
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use vortex_buffer::BufferMut;
5+
use vortex_compute::filter::Filter;
6+
use vortex_dtype::match_each_integer_ptype;
7+
use vortex_mask::Mask;
8+
use vortex_vector::VectorMutOps;
9+
use vortex_vector::primitive::{PVector, PVectorMut, PrimitiveVector};
10+
11+
use crate::BitPackedArray;
12+
use crate::bitpacking::array::BitPacked;
13+
14+
/// Helper function to get the true count of a mask with a default if it doesn't exist.
15+
fn true_count_of_optional_mask(selection_mask: &Option<&Mask>, default: usize) -> usize {
16+
selection_mask
17+
.map(|mask| mask.true_count())
18+
.unwrap_or(default)
19+
}
20+
21+
/// Given a [`BitPackedArray`], unpacks all bitpacked values and creates a new [`PrimitiveVector`].
22+
///
23+
/// If a selection mask is passed in, the resultant vector will have the specified values filtered
24+
/// out.
25+
///
26+
/// Internally, this calls [`unpack_into_pvector`].
27+
pub fn unpack_into_vector(
28+
array: &BitPackedArray,
29+
selection_mask: Option<&Mask>,
30+
) -> PrimitiveVector {
31+
match_each_integer_ptype!(array.ptype(), |T| {
32+
unpack_into_pvector::<T>(array, selection_mask).into()
33+
})
34+
}
35+
36+
/// Given a [`BitPackedArray`] and a template type `T: BitPacked`, unpacks all bitpacked values and
37+
/// creates a new [`PVector<T>`].
38+
///
39+
/// If a selection mask is passed in, the resultant vector will have the specified values filtered
40+
/// out.
41+
///
42+
/// Internally, this calls [`write_unpacked_to_pvector`].
43+
pub fn unpack_into_pvector<T: BitPacked>(
44+
array: &BitPackedArray,
45+
selection_mask: Option<&Mask>,
46+
) -> PVector<T> {
47+
let num_new_values = true_count_of_optional_mask(&selection_mask, array.len());
48+
let mut pvector = PVectorMut::with_capacity(num_new_values);
49+
50+
if num_new_values == 0 {
51+
return pvector.freeze();
52+
}
53+
54+
write_unpacked_to_pvector(array, selection_mask, &mut pvector);
55+
56+
pvector.freeze()
57+
}
58+
59+
/// Given a [`BitPackedArray`] and a template type `T: BitPacked`, unpacks all bitpacked values and
60+
/// writes them directly into an existing [`PVectorMut<T>`].
61+
///
62+
/// If a selection mask is passed in, the specified values will not be written.
63+
///
64+
/// Internally, this calls [`write_unpacked_to_buffer`].
65+
pub fn write_unpacked_to_pvector<T: BitPacked>(
66+
array: &BitPackedArray,
67+
selection_mask: Option<&Mask>,
68+
vector: &mut PVectorMut<T>,
69+
) {
70+
let num_new_values = true_count_of_optional_mask(&selection_mask, array.len());
71+
if num_new_values == 0 {
72+
return;
73+
}
74+
75+
vector.reserve(num_new_values);
76+
77+
let validity_mask = match selection_mask {
78+
Some(selection_mask) => array.validity_mask().filter(selection_mask),
79+
None => array.validity_mask(),
80+
};
81+
debug_assert_eq!(validity_mask.len(), num_new_values);
82+
83+
// SAFETY: We add the same amount of elements to both the buffer and the validity mask.
84+
let (buffer_mut, vector_validity_mut) = unsafe { vector.mut_parts() };
85+
86+
// We need to write the unpacked values to the buffer as well as update the validity mask.
87+
88+
// SAFETY: We reserved enough space in the `reserve` call above.
89+
unsafe { write_unpacked_to_buffer(array, selection_mask, buffer_mut) };
90+
vector_validity_mut.append_mask(&validity_mask);
91+
92+
debug_assert_eq!(buffer_mut.len(), vector_validity_mut.len());
93+
}
94+
95+
/// Given a [`BitPackedArray`] and a template type `T: BitPacked`, unpacks all bitpacked values and
96+
/// writes them directly into an existing [`BufferMut<T>`].
97+
///
98+
/// If a selection mask is passed in, the specified values will not be written.
99+
///
100+
/// WARNING: this will completely ignore the validity mask of the [`BitPackedArray`]. This function
101+
/// should only be called from [`write_unpacked_to_pvector`].
102+
///
103+
/// # Safety
104+
///
105+
/// The caller must ensurre the buffer has enough capacity to write all of the unpacked values of
106+
/// the [`BitPackedArray`] into it.
107+
unsafe fn write_unpacked_to_buffer<T: BitPacked>(
108+
array: &BitPackedArray,
109+
selection_mask: Option<&Mask>,
110+
buffer: &mut BufferMut<T>,
111+
) {
112+
// TODO(connor): Where did these numbers come from? Add a public link after validating them.
113+
let full_decompression_threshold = match size_of::<T>() {
114+
1 => 0.03,
115+
2 => 0.03,
116+
4 => 0.075,
117+
_ => 0.09,
118+
// >8 bytes may have a higher threshold. These numbers are derived from a GCP c2-standard-4
119+
// with a "Cascade Lake" CPU.
120+
};
121+
122+
// If the selection mask is sparse, then we want to filter the bitpacked values while we unpack
123+
// all of the values.
124+
if let Some(selection) = selection_mask
125+
&& selection.density() < full_decompression_threshold
126+
{
127+
// SAFETY: The safety contract of `write_unpacked_to_buffer` ensures that we have enough
128+
// capacity to write into the buffer.
129+
unsafe { filter_while_unpacking_array(array, selection, buffer) };
130+
}
131+
132+
// Otherwise, if the selection mask is dense, then we might as well unpack all of the values and
133+
// then perform filtering.
134+
135+
// SAFETY: The safety contract of `write_unpacked_to_buffer` ensures that we have enough
136+
// capacity to write into the buffer.
137+
unsafe { unpack_array(array, buffer) };
138+
139+
if let Some(selection) = selection_mask {
140+
todo!("Filter the `BufferMut` with the mask in place")
141+
}
142+
}
143+
144+
/// Unpacks the bitpacked values in the [`BitPackedArray`] directly into a mutable buffer.
145+
///
146+
/// WARNING: this will completely ignore the validity mask of the [`BitPackedArray`]. This function
147+
/// should only be called from [`write_unpacked_to_pvector`].
148+
///
149+
/// # Safety
150+
///
151+
/// The caller must ensurre the buffer has enough capacity to write all of the unpacked values of
152+
/// the [`BitPackedArray`] into it.
153+
unsafe fn unpack_array<T: BitPacked>(array: &BitPackedArray, buffer: &mut BufferMut<T>) {
154+
todo!()
155+
}
156+
157+
/// Unpacks the bitpacked array into the given buffer according to the given selection mask.
158+
///
159+
/// WARNING: this will completely ignore the validity mask of the [`BitPackedArray`]. This function
160+
/// should only be called from [`write_unpacked_to_pvector`].
161+
///
162+
/// # Safety
163+
///
164+
/// The caller must ensurre the buffer has enough capacity to write all of the unpacked values of
165+
/// the [`BitPackedArray`] into it.
166+
unsafe fn filter_while_unpacking_array<T: BitPacked>(
167+
array: &BitPackedArray,
168+
selection_mask: &Mask,
169+
buffer: &mut BufferMut<T>,
170+
) {
171+
todo!()
172+
}

encodings/fastlanes/src/bitpacking/array/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ use vortex_error::{VortexResult, vortex_bail, vortex_ensure};
1313

1414
pub mod bitpack_compress;
1515
pub mod bitpack_decompress;
16+
pub mod bitpack_decompress_vector; // Name TBBS.
1617
pub mod unpack_iter;
1718

1819
use crate::bitpack_compress::bitpack_encode;

encodings/fastlanes/src/bitpacking/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@
22
// SPDX-FileCopyrightText: Copyright the Vortex contributors
33

44
mod array;
5-
pub use array::{BitPackedArray, bitpack_compress, bitpack_decompress, unpack_iter};
5+
pub use array::{
6+
BitPackedArray, bitpack_compress, bitpack_decompress, bitpack_decompress_vector, unpack_iter,
7+
};
68

79
mod vtable;
810
pub use vtable::{BitPackedEncoding, BitPackedVTable};

vortex-vector/src/primitive/generic_mut.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,15 @@ impl<T> PVectorMut<T> {
7878
(self.elements, self.validity)
7979
}
8080

81-
/// Append n values to the vector.
82-
pub fn append_values(&mut self, value: T, n: usize)
83-
where
84-
T: Copy,
85-
{
86-
self.elements.push_n(value, n);
87-
self.validity.append_n(true, n);
81+
/// Decomposes a mutable reference to the primitive vector into mutable references to it's
82+
/// constituent parts (buffer and validity).
83+
///
84+
/// # Safety
85+
///
86+
/// The caller must ensure the [`BufferMut`] has the same length as the [`MaskMut`] once drop
87+
/// the exclusive references.
88+
pub unsafe fn mut_parts(&mut self) -> (&mut BufferMut<T>, &mut MaskMut) {
89+
(&mut self.elements, &mut self.validity)
8890
}
8991
}
9092

vortex-vector/src/primitive/generic_mut_impl.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,15 @@ impl<T: NativePType> PVectorMut<T> {
6262
self.validity.append_n(false, 1);
6363
}
6464
}
65+
66+
/// Append `n` of the same `value` to the vector.
67+
pub fn append_values(&mut self, value: T, n: usize)
68+
where
69+
T: Copy,
70+
{
71+
self.elements.push_n(value, n);
72+
self.validity.append_n(true, n);
73+
}
6574
}
6675

6776
impl<T: NativePType> AsRef<[T]> for PVectorMut<T> {

0 commit comments

Comments
 (0)