@@ -11,23 +11,33 @@ mod tests {
1111 use cudarc:: driver:: DevicePtr ;
1212 use cudarc:: driver:: LaunchConfig ;
1313 use cudarc:: driver:: PushKernelArg ;
14+ use vortex_alp:: ALPFloat ;
15+ use vortex_alp:: Exponents ;
16+ use vortex_alp:: alp_encode;
17+ use vortex_array:: ToCanonical ;
1418 use vortex_array:: arrays:: PrimitiveArray ;
1519 use vortex_array:: buffer:: BufferHandle ;
1620 use vortex_array:: validity:: Validity :: NonNullable ;
1721 use vortex_buffer:: Buffer ;
1822 use vortex_error:: VortexExpect ;
1923 use vortex_error:: VortexResult ;
2024 use vortex_fastlanes:: BitPackedArray ;
25+ use vortex_fastlanes:: FoRArray ;
2126 use vortex_session:: VortexSession ;
2227
2328 use crate :: CudaBufferExt ;
2429 use crate :: CudaDeviceBuffer ;
2530 use crate :: CudaExecutionCtx ;
2631 use crate :: dynamic_dispatch_op:: DynamicOp ;
32+ use crate :: dynamic_dispatch_op:: DynamicOpCode_ALP ;
2733 use crate :: dynamic_dispatch_op:: DynamicOpCode_BITUNPACK ;
2834 use crate :: dynamic_dispatch_op:: DynamicOpCode_FOR ;
2935 use crate :: session:: CudaSession ;
3036
37+ fn pack_alp_f32_param ( f : f32 , e : f32 ) -> u64 {
38+ ( e. to_bits ( ) as u64 ) << 32 | f. to_bits ( ) as u64
39+ }
40+
3141 fn make_bitpacked_array_u32 ( bit_width : u8 , len : usize ) -> BitPackedArray {
3242 let max_val = ( 1u64 << bit_width) . saturating_sub ( 1 ) ;
3343 let values: Vec < u32 > = ( 0 ..len)
@@ -90,6 +100,17 @@ mod tests {
90100 Ok ( host_output[ ..output_len] . to_vec ( ) )
91101 }
92102
103+ fn run_dynamic_dispatch_f32 (
104+ cuda_ctx : & CudaExecutionCtx ,
105+ input_ptr : u64 ,
106+ output_len : usize ,
107+ ops : & [ DynamicOp ] ,
108+ ) -> VortexResult < Vec < f32 > > {
109+ let result = run_dynamic_dispatch_u32 ( cuda_ctx, input_ptr, output_len, ops) ?;
110+ // SAFETY: f32 and u32 have identical size and alignment.
111+ Ok ( unsafe { std:: mem:: transmute :: < Vec < u32 > , Vec < f32 > > ( result) } )
112+ }
113+
93114 fn copy_to_device (
94115 cuda_ctx : & CudaExecutionCtx ,
95116 bitpacked : & BitPackedArray ,
@@ -132,17 +153,20 @@ mod tests {
132153
133154 #[ test]
134155 fn test_for ( ) -> VortexResult < ( ) > {
135- let reference: u32 = 42 ;
136156 let len = 5000 ;
137157
138- let input: Vec < u32 > = ( 0 ..len) . map ( |i| i as u32 ) . collect ( ) ;
139- let expected: Vec < u32 > = input. iter ( ) . map ( |v| v + reference) . collect ( ) ;
158+ let original: Vec < u32 > = ( 0 ..len) . map ( |i| i as u32 + 42 ) . collect ( ) ;
159+ let primitive = PrimitiveArray :: new ( Buffer :: from ( original. clone ( ) ) , NonNullable ) ;
160+
161+ let for_array = FoRArray :: encode ( primitive) ?;
162+ let reference = u32:: try_from ( for_array. reference_scalar ( ) ) ?;
140163
141164 let cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
142165
166+ let encoded_prim = for_array. encoded ( ) . to_primitive ( ) ;
143167 let device_input = cuda_ctx
144168 . stream ( )
145- . clone_htod ( input . as_slice ( ) )
169+ . clone_htod ( encoded_prim . as_slice :: < u32 > ( ) )
146170 . expect ( "copy input to device" ) ;
147171 let input_ptr = device_input. device_ptr ( cuda_ctx. stream ( ) ) . 0 ;
148172
@@ -151,13 +175,100 @@ mod tests {
151175 param : reference as u64 ,
152176 } ] ;
153177
178+ // Kernel should reconstruct the original data.
154179 let result = run_dynamic_dispatch_u32 ( & cuda_ctx, input_ptr, len, & ops) ?;
155- assert_eq ! ( result, expected) ;
180+ assert_eq ! ( result, original) ;
181+
182+ Ok ( ( ) )
183+ }
184+
185+ #[ test]
186+ fn test_alp ( ) -> VortexResult < ( ) > {
187+ let len = 2050 ;
188+
189+ // Start from f32 data that ALP-encodes cleanly - no patches.
190+ let exponents = Exponents { e : 2 , f : 0 } ;
191+ let floats: Vec < f32 > = ( 0 ..len)
192+ . map ( |i| <f32 as ALPFloat >:: decode_single ( i as i32 , exponents) )
193+ . collect ( ) ;
194+ let float_prim = PrimitiveArray :: new ( Buffer :: from ( floats. clone ( ) ) , NonNullable ) ;
195+
196+ let alp_array = alp_encode ( & float_prim, Some ( exponents) ) ?;
197+ assert ! ( alp_array. patches( ) . is_none( ) ) ;
198+
199+ let f = <f32 as ALPFloat >:: F10 [ alp_array. exponents ( ) . f as usize ] ;
200+ let e = <f32 as ALPFloat >:: IF10 [ alp_array. exponents ( ) . e as usize ] ;
201+
202+ let cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
203+
204+ let encoded_prim = alp_array. encoded ( ) . to_primitive ( ) ;
205+ let device_input = cuda_ctx
206+ . stream ( )
207+ . clone_htod ( encoded_prim. as_slice :: < i32 > ( ) )
208+ . expect ( "copy input to device" ) ;
209+ let input_ptr = device_input. device_ptr ( cuda_ctx. stream ( ) ) . 0 ;
210+
211+ let ops = [ DynamicOp {
212+ op : DynamicOpCode_ALP ,
213+ param : pack_alp_f32_param ( f, e) ,
214+ } ] ;
215+
216+ let result = run_dynamic_dispatch_f32 ( & cuda_ctx, input_ptr, len, & ops) ?;
217+ assert_eq ! ( result, floats) ;
218+
219+ Ok ( ( ) )
220+ }
221+
222+ #[ test]
223+ fn test_alp_for_bitunpack ( ) -> VortexResult < ( ) > {
224+ let len = 2050 ;
225+
226+ let exponents = Exponents { e : 2 , f : 0 } ;
227+ let floats: Vec < f32 > = ( 0 ..len)
228+ . map ( |i| <f32 as ALPFloat >:: decode_single ( 10 + ( i as i32 % 64 ) , exponents) )
229+ . collect ( ) ;
230+ let float_prim = PrimitiveArray :: new ( Buffer :: from ( floats. clone ( ) ) , NonNullable ) ;
231+
232+ // ALP encode f32 → i32 encoded integers + exponents.
233+ let alp_array = alp_encode ( & float_prim, Some ( exponents) ) ?;
234+ assert ! ( alp_array. patches( ) . is_none( ) ) ;
235+
236+ // FOR encode the ALP-encoded i32 integers.
237+ let for_array = FoRArray :: encode ( alp_array. encoded ( ) . to_primitive ( ) ) ?;
238+ let reference = i32:: try_from ( for_array. reference_scalar ( ) ) ? as u32 ;
239+
240+ // BitPack the FOR-encoded values.
241+ let bit_width: u8 = 6 ;
242+ let bitpacked = BitPackedArray :: encode ( for_array. encoded ( ) , bit_width) ?;
243+
244+ // Derive ALP decode factors from the actual exponents.
245+ let alp_f = <f32 as ALPFloat >:: F10 [ alp_array. exponents ( ) . f as usize ] ;
246+ let alp_e = <f32 as ALPFloat >:: IF10 [ alp_array. exponents ( ) . e as usize ] ;
247+
248+ let cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
249+ let ( input_ptr, _device_input) = copy_to_device ( & cuda_ctx, & bitpacked) ?;
250+
251+ let ops = [
252+ DynamicOp {
253+ op : DynamicOpCode_BITUNPACK ,
254+ param : bit_width as u64 ,
255+ } ,
256+ DynamicOp {
257+ op : DynamicOpCode_FOR ,
258+ param : reference as u64 ,
259+ } ,
260+ DynamicOp {
261+ op : DynamicOpCode_ALP ,
262+ param : pack_alp_f32_param ( alp_f, alp_e) ,
263+ } ,
264+ ] ;
265+
266+ let result = run_dynamic_dispatch_f32 ( & cuda_ctx, input_ptr, len, & ops) ?;
267+ assert_eq ! ( result, floats) ;
156268
157269 Ok ( ( ) )
158270 }
159271
160- /// 1 bitunpack + 7 FoR
161272 #[ test]
162273 fn test_max_ops_bitunpack_7for ( ) -> VortexResult < ( ) > {
163274 let bit_width: u8 = 6 ;
0 commit comments