@@ -46,7 +46,10 @@ use crate::{
4646 } ,
4747 general:: { GeneralMiniBlockCompressor , GeneralMiniBlockDecompressor } ,
4848 packed:: {
49- PackedStructFixedWidthMiniBlockDecompressor , PackedStructFixedWidthMiniBlockEncoder ,
49+ PackedStructFixedWidthMiniBlockDecompressor ,
50+ PackedStructFixedWidthMiniBlockEncoder , PackedStructVariablePerValueDecompressor ,
51+ PackedStructVariablePerValueEncoder , VariablePackedStructFieldDecoder ,
52+ VariablePackedStructFieldKind ,
5053 } ,
5154 rle:: { RleMiniBlockDecompressor , RleMiniBlockEncoder } ,
5255 value:: { ValueDecompressor , ValueEncoder } ,
@@ -64,7 +67,7 @@ use arrow_array::{cast::AsArray, types::UInt64Type};
6467use fsst:: fsst:: { FSST_LEAST_INPUT_MAX_LENGTH , FSST_LEAST_INPUT_SIZE } ;
6568use lance_core:: { datatypes:: Field , error:: LanceOptionExt , Error , Result } ;
6669use snafu:: location;
67- use std:: str:: FromStr ;
70+ use std:: { str:: FromStr , sync :: Arc } ;
6871
6972/// Default threshold for RLE compression selection.
7073/// RLE is chosen when the run count is less than this fraction of total values.
@@ -128,7 +131,7 @@ pub trait CompressionStrategy: Send + Sync + std::fmt::Debug {
128131 ) -> Result < Box < dyn MiniBlockCompressor > > ;
129132}
130133
131- #[ derive( Debug , Default ) ]
134+ #[ derive( Debug , Default , Clone ) ]
132135pub struct DefaultCompressionStrategy {
133136 /// User-configured compression parameters
134137 params : CompressionParams ,
@@ -297,6 +300,12 @@ impl DefaultCompressionStrategy {
297300 }
298301 }
299302
303+ /// Override the file version used to make compression decisions
304+ pub fn with_version ( mut self , version : LanceFileVersion ) -> Self {
305+ self . version = version;
306+ self
307+ }
308+
300309 /// Parse compression parameters from field metadata
301310 fn parse_field_metadata ( field : & Field ) -> CompressionFieldParams {
302311 let mut params = CompressionFieldParams :: default ( ) ;
@@ -431,12 +440,11 @@ impl CompressionStrategy for DefaultCompressionStrategy {
431440 DataBlock :: Struct ( struct_data_block) => {
432441 // this condition is actually checked at `PrimitiveStructuralEncoder::do_flush`,
433442 // just being cautious here.
434- if struct_data_block
435- . children
436- . iter ( )
437- . any ( |child| !matches ! ( child, DataBlock :: FixedWidth ( _) ) )
438- {
439- panic ! ( "packed struct encoding currently only supports fixed-width fields." )
443+ if struct_data_block. has_variable_width_child ( ) {
444+ return Err ( Error :: invalid_input (
445+ "Packed struct mini-block encoding supports only fixed-width children" ,
446+ location ! ( ) ,
447+ ) ) ;
440448 }
441449 Ok ( Box :: new ( PackedStructFixedWidthMiniBlockEncoder :: default ( ) ) )
442450 }
@@ -471,6 +479,32 @@ impl CompressionStrategy for DefaultCompressionStrategy {
471479 match data {
472480 DataBlock :: FixedWidth ( _) => Ok ( Box :: new ( ValueEncoder :: default ( ) ) ) ,
473481 DataBlock :: FixedSizeList ( _) => Ok ( Box :: new ( ValueEncoder :: default ( ) ) ) ,
482+ DataBlock :: Struct ( struct_block) => {
483+ if field. children . len ( ) != struct_block. children . len ( ) {
484+ return Err ( Error :: invalid_input (
485+ "Struct field metadata does not match data block children" ,
486+ location ! ( ) ,
487+ ) ) ;
488+ }
489+ let has_variable_child = struct_block. has_variable_width_child ( ) ;
490+ if has_variable_child {
491+ if self . version < LanceFileVersion :: V2_2 {
492+ return Err ( Error :: NotSupported {
493+ source : "Variable packed struct encoding requires Lance file version 2.2 or later" . into ( ) ,
494+ location : location ! ( ) ,
495+ } ) ;
496+ }
497+ Ok ( Box :: new ( PackedStructVariablePerValueEncoder :: new (
498+ self . clone ( ) ,
499+ field. children . clone ( ) ,
500+ ) ) )
501+ } else {
502+ Err ( Error :: invalid_input (
503+ "Packed struct per-value compression should not be used for fixed-width-only structs" ,
504+ location ! ( ) ,
505+ ) )
506+ }
507+ }
474508 DataBlock :: VariableWidth ( variable_width) => {
475509 let max_len = variable_width. expect_single_stat :: < UInt64Type > ( Stat :: MaxLength ) ;
476510 let data_size = variable_width. expect_single_stat :: < UInt64Type > ( Stat :: DataSize ) ;
@@ -784,6 +818,52 @@ impl DecompressionStrategy for DefaultDecompressionStrategy {
784818 general. compression . as_ref ( ) . expect_ok ( ) ?. scheme ( ) ,
785819 ) ?) )
786820 }
821+ Compression :: VariablePackedStruct ( description) => {
822+ let mut fields = Vec :: with_capacity ( description. fields . len ( ) ) ;
823+ for field in & description. fields {
824+ let value_encoding = field. value . as_ref ( ) . ok_or_else ( || {
825+ Error :: invalid_input (
826+ "VariablePackedStruct field is missing value encoding" ,
827+ location ! ( ) ,
828+ )
829+ } ) ?;
830+ let decoder = match field. layout . as_ref ( ) . ok_or_else ( || {
831+ Error :: invalid_input (
832+ "VariablePackedStruct field is missing layout details" ,
833+ location ! ( ) ,
834+ )
835+ } ) ? {
836+ crate :: format:: pb21:: variable_packed_struct:: field_encoding:: Layout :: BitsPerValue (
837+ bits_per_value,
838+ ) => {
839+ let decompressor =
840+ self . create_fixed_per_value_decompressor ( value_encoding) ?;
841+ VariablePackedStructFieldDecoder {
842+ kind : VariablePackedStructFieldKind :: Fixed {
843+ bits_per_value : * bits_per_value,
844+ decompressor : Arc :: from ( decompressor) ,
845+ } ,
846+ }
847+ }
848+ crate :: format:: pb21:: variable_packed_struct:: field_encoding:: Layout :: BitsPerLength (
849+ bits_per_length,
850+ ) => {
851+ let decompressor =
852+ self . create_variable_per_value_decompressor ( value_encoding) ?;
853+ VariablePackedStructFieldDecoder {
854+ kind : VariablePackedStructFieldKind :: Variable {
855+ bits_per_length : * bits_per_length,
856+ decompressor : Arc :: from ( decompressor) ,
857+ } ,
858+ }
859+ }
860+ } ;
861+ fields. push ( decoder) ;
862+ }
863+ Ok ( Box :: new ( PackedStructVariablePerValueDecompressor :: new (
864+ fields,
865+ ) ) )
866+ }
787867 _ => todo ! ( "variable-per-value decompressor for {:?}" , description) ,
788868 }
789869 }
0 commit comments