Skip to content

Commit 8945cd8

Browse files
authored
Add decimal vectors (#5124)
1 parent 1cb3f21 commit 8945cd8

File tree

39 files changed

+1208
-183
lines changed

39 files changed

+1208
-183
lines changed

Cargo.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-array/src/stats/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use log::debug;
1313
use num_enum::{IntoPrimitive, TryFromPrimitive};
1414
pub use stats_set::*;
1515
use vortex_dtype::Nullability::{NonNullable, Nullable};
16-
use vortex_dtype::{DECIMAL256_MAX_PRECISION, DType, DecimalDType, PType};
16+
use vortex_dtype::{DType, DecimalDType, NativeDecimalType, PType, i256};
1717

1818
mod array;
1919
mod bound;
@@ -210,7 +210,7 @@ impl Stat {
210210
// - https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Sum.scala#L66
211211
// - https://github.com/apache/datafusion/blob/4153adf2c0f6e317ef476febfdc834208bd46622/datafusion/functions-aggregate/src/sum.rs#L188
212212
let precision =
213-
u8::min(DECIMAL256_MAX_PRECISION, decimal_dtype.precision() + 10);
213+
u8::min(i256::MAX_PRECISION, decimal_dtype.precision() + 10);
214214
DType::Decimal(
215215
DecimalDType::new(precision, decimal_dtype.scale()),
216216
Nullable,

vortex-compute/Cargo.toml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,20 @@ workspace = true
2222
[dependencies]
2323
vortex-buffer = { workspace = true }
2424
vortex-dtype = { workspace = true }
25+
vortex-error = { workspace = true }
2526
vortex-mask = { workspace = true }
2627
vortex-vector = { workspace = true }
2728

29+
arrow-array = { workspace = true, optional = true }
30+
arrow-buffer = { workspace = true, optional = true }
31+
arrow-schema = { workspace = true, optional = true }
2832
num-traits = { workspace = true }
2933

3034
[features]
31-
default = ["arithmetic", "comparison", "filter", "logical", "mask"]
35+
default = ["arithmetic", "arrow", "comparison", "filter", "logical", "mask"]
3236

3337
arithmetic = []
38+
arrow = ["dep:arrow-array", "dep:arrow-buffer", "dep:arrow-schema"]
3439
comparison = []
3540
filter = []
3641
logical = []

vortex-compute/src/arrow/bool.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::{ArrayRef, BooleanArray};
7+
use vortex_error::VortexResult;
8+
use vortex_vector::BoolVector;
9+
10+
use crate::arrow::IntoArrow;
11+
12+
impl IntoArrow<ArrayRef> for BoolVector {
13+
fn into_arrow(self) -> VortexResult<ArrayRef> {
14+
let (bits, validity) = self.into_parts();
15+
Ok(Arc::new(BooleanArray::new(
16+
bits.into(),
17+
validity.into_arrow()?,
18+
)))
19+
}
20+
}
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::types::{Decimal32Type, Decimal64Type, Decimal128Type, Decimal256Type};
7+
use arrow_array::{ArrayRef, PrimitiveArray};
8+
use vortex_buffer::Buffer;
9+
use vortex_dtype::i256;
10+
use vortex_error::VortexResult;
11+
use vortex_vector::{DVector, DecimalVector};
12+
13+
use crate::arrow::IntoArrow;
14+
15+
impl IntoArrow<ArrayRef> for DecimalVector {
16+
fn into_arrow(self) -> VortexResult<ArrayRef> {
17+
match self {
18+
DecimalVector::D8(v) => v.into_arrow(),
19+
DecimalVector::D16(v) => v.into_arrow(),
20+
DecimalVector::D32(v) => v.into_arrow(),
21+
DecimalVector::D64(v) => v.into_arrow(),
22+
DecimalVector::D128(v) => v.into_arrow(),
23+
DecimalVector::D256(v) => v.into_arrow(),
24+
}
25+
}
26+
}
27+
28+
macro_rules! impl_decimal_upcast_i32 {
29+
($T:ty) => {
30+
impl IntoArrow<ArrayRef> for DVector<$T> {
31+
fn into_arrow(self) -> VortexResult<ArrayRef> {
32+
let (_, elements, validity) = self.into_parts();
33+
// Upcast the DVector to Arrow's smallest decimal type (Decimal32)
34+
let elements =
35+
Buffer::<i32>::from_trusted_len_iter(elements.iter().map(|i| *i as i32));
36+
Ok(Arc::new(PrimitiveArray::<Decimal32Type>::new(
37+
elements.into_arrow_scalar_buffer(),
38+
validity.into_arrow()?,
39+
)))
40+
}
41+
}
42+
};
43+
}
44+
45+
impl_decimal_upcast_i32!(i8);
46+
impl_decimal_upcast_i32!(i16);
47+
48+
/// Direct Arrow conversion for vectors that map directly to Arrow decimal types.
49+
macro_rules! impl_decimal {
50+
($T:ty, $A:ty) => {
51+
impl IntoArrow<ArrayRef> for DVector<$T> {
52+
fn into_arrow(self) -> VortexResult<ArrayRef> {
53+
let (_, elements, validity) = self.into_parts();
54+
Ok(Arc::new(PrimitiveArray::<$A>::new(
55+
elements.into_arrow_scalar_buffer(),
56+
validity.into_arrow()?,
57+
)))
58+
}
59+
}
60+
};
61+
}
62+
63+
impl_decimal!(i32, Decimal32Type);
64+
impl_decimal!(i64, Decimal64Type);
65+
impl_decimal!(i128, Decimal128Type);
66+
67+
impl IntoArrow<ArrayRef> for DVector<i256> {
68+
fn into_arrow(self) -> VortexResult<ArrayRef> {
69+
let (_, elements, validity) = self.into_parts();
70+
71+
// Transmute the elements from our i256 to Arrow's.
72+
// SAFETY: we use Arrow's type internally for our layout.
73+
let elements =
74+
unsafe { std::mem::transmute::<Buffer<i256>, Buffer<arrow_buffer::i256>>(elements) };
75+
76+
Ok(Arc::new(PrimitiveArray::<Decimal256Type>::new(
77+
elements.into_arrow_scalar_buffer(),
78+
validity.into_arrow()?,
79+
)))
80+
}
81+
}

vortex-compute/src/arrow/mask.rs

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use arrow_buffer::NullBuffer;
5+
use vortex_error::VortexResult;
6+
use vortex_mask::Mask;
7+
8+
use crate::arrow::IntoArrow;
9+
10+
impl IntoArrow<Option<NullBuffer>> for Mask {
11+
fn into_arrow(self) -> VortexResult<Option<NullBuffer>> {
12+
Ok(match self {
13+
Mask::AllTrue(_) => None,
14+
Mask::AllFalse(len) => Some(NullBuffer::new_null(len)),
15+
Mask::Values(values) => {
16+
// SAFETY: we maintain our own validated true count.
17+
Some(unsafe {
18+
NullBuffer::new_unchecked(
19+
values.bit_buffer().clone().into(),
20+
values.len() - values.true_count(),
21+
)
22+
})
23+
}
24+
})
25+
}
26+
}

vortex-compute/src/arrow/mod.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
//! Conversion logic from Vortex vector types to Arrow types.
5+
6+
use vortex_error::VortexResult;
7+
8+
mod bool;
9+
mod decimal;
10+
mod mask;
11+
mod null;
12+
mod primitive;
13+
mod struct_;
14+
mod varbin;
15+
mod vector;
16+
17+
/// Trait for converting Vortex vector types into Arrow types.
18+
pub trait IntoArrow<Output> {
19+
/// Convert the Vortex type into an Arrow type.
20+
fn into_arrow(self) -> VortexResult<Output>;
21+
}

vortex-compute/src/arrow/null.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::{ArrayRef, NullArray};
7+
use vortex_error::VortexResult;
8+
use vortex_vector::{NullVector, VectorOps};
9+
10+
use crate::arrow::IntoArrow;
11+
12+
impl IntoArrow<ArrayRef> for NullVector {
13+
fn into_arrow(self) -> VortexResult<ArrayRef> {
14+
Ok(Arc::new(NullArray::new(self.len())))
15+
}
16+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::types::{
7+
Float16Type, Float32Type, Float64Type, Int8Type, Int16Type, Int32Type, Int64Type, UInt8Type,
8+
UInt16Type, UInt32Type, UInt64Type,
9+
};
10+
use arrow_array::{ArrayRef, PrimitiveArray};
11+
use vortex_dtype::half::f16;
12+
use vortex_error::VortexResult;
13+
use vortex_vector::{PVector, PrimitiveVector, match_each_pvector};
14+
15+
use crate::arrow::IntoArrow;
16+
17+
impl IntoArrow<ArrayRef> for PrimitiveVector {
18+
fn into_arrow(self) -> VortexResult<ArrayRef> {
19+
match_each_pvector!(self, |v| { v.into_arrow() })
20+
}
21+
}
22+
23+
macro_rules! impl_primitive {
24+
($T:ty, $A:ty) => {
25+
impl IntoArrow<ArrayRef> for PVector<$T> {
26+
fn into_arrow(self) -> VortexResult<ArrayRef> {
27+
let (elements, validity) = self.into_parts();
28+
Ok(Arc::new(PrimitiveArray::<$A>::new(
29+
elements.into_arrow_scalar_buffer(),
30+
validity.into_arrow()?,
31+
)))
32+
}
33+
}
34+
};
35+
}
36+
37+
impl_primitive!(u8, UInt8Type);
38+
impl_primitive!(u16, UInt16Type);
39+
impl_primitive!(u32, UInt32Type);
40+
impl_primitive!(u64, UInt64Type);
41+
impl_primitive!(i8, Int8Type);
42+
impl_primitive!(i16, Int16Type);
43+
impl_primitive!(i32, Int32Type);
44+
impl_primitive!(i64, Int64Type);
45+
impl_primitive!(f16, Float16Type);
46+
impl_primitive!(f32, Float32Type);
47+
impl_primitive!(f64, Float64Type);
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright the Vortex contributors
3+
4+
use std::sync::Arc;
5+
6+
use arrow_array::{ArrayRef, StructArray};
7+
use arrow_schema::{Field, Fields};
8+
use vortex_error::VortexResult;
9+
use vortex_vector::StructVector;
10+
11+
use crate::arrow::IntoArrow;
12+
13+
impl IntoArrow<ArrayRef> for StructVector {
14+
fn into_arrow(self) -> VortexResult<ArrayRef> {
15+
let (fields, validity) = self.into_parts();
16+
let arrow_fields = fields
17+
.iter()
18+
.map(|field| field.clone().into_arrow())
19+
.collect::<VortexResult<Vec<ArrayRef>>>()?;
20+
21+
// We need to make up the field names since vectors are unnamed.
22+
let fields = Fields::from(
23+
(0..arrow_fields.len())
24+
.map(|i| Field::new(i.to_string(), arrow_fields[i].data_type().clone(), true))
25+
.collect::<Vec<Field>>(),
26+
);
27+
28+
Ok(Arc::new(StructArray::new(
29+
fields,
30+
arrow_fields,
31+
validity.into_arrow()?,
32+
)))
33+
}
34+
}

0 commit comments

Comments
 (0)