@@ -11,23 +11,34 @@ mod tests {
1111 use cudarc:: driver:: DevicePtr ;
1212 use cudarc:: driver:: LaunchConfig ;
1313 use cudarc:: driver:: PushKernelArg ;
14+ use vortex_alp:: ALPFloat ;
15+ use vortex_alp:: Exponents ;
16+ use vortex_alp:: alp_encode;
17+ use vortex_array:: ToCanonical ;
1418 use vortex_array:: arrays:: PrimitiveArray ;
1519 use vortex_array:: buffer:: BufferHandle ;
1620 use vortex_array:: validity:: Validity :: NonNullable ;
1721 use vortex_buffer:: Buffer ;
22+ use vortex_dtype:: PType ;
1823 use vortex_error:: VortexExpect ;
1924 use vortex_error:: VortexResult ;
2025 use vortex_fastlanes:: BitPackedArray ;
26+ use vortex_fastlanes:: FoRArray ;
2127 use vortex_session:: VortexSession ;
2228
2329 use crate :: CudaBufferExt ;
2430 use crate :: CudaDeviceBuffer ;
2531 use crate :: CudaExecutionCtx ;
2632 use crate :: dynamic_dispatch_op:: DynamicOp ;
33+ use crate :: dynamic_dispatch_op:: DynamicOpCode_ALP ;
2734 use crate :: dynamic_dispatch_op:: DynamicOpCode_BITUNPACK ;
2835 use crate :: dynamic_dispatch_op:: DynamicOpCode_FOR ;
2936 use crate :: session:: CudaSession ;
3037
38+ fn pack_alp_f32_param ( f : f32 , e : f32 ) -> u64 {
39+ ( e. to_bits ( ) as u64 ) << 32 | f. to_bits ( ) as u64
40+ }
41+
3142 fn make_bitpacked_array_u32 ( bit_width : u8 , len : usize ) -> BitPackedArray {
3243 let max_val = ( 1u64 << bit_width) . saturating_sub ( 1 ) ;
3344 let values: Vec < u32 > = ( 0 ..len)
@@ -90,6 +101,17 @@ mod tests {
90101 Ok ( host_output[ ..output_len] . to_vec ( ) )
91102 }
92103
104+ fn run_dynamic_dispatch_f32 (
105+ cuda_ctx : & CudaExecutionCtx ,
106+ input_ptr : u64 ,
107+ output_len : usize ,
108+ ops : & [ DynamicOp ] ,
109+ ) -> VortexResult < Vec < f32 > > {
110+ let result = run_dynamic_dispatch_u32 ( cuda_ctx, input_ptr, output_len, ops) ?;
111+ // SAFETY: f32 and u32 have identical size and alignment.
112+ Ok ( unsafe { std:: mem:: transmute :: < Vec < u32 > , Vec < f32 > > ( result) } )
113+ }
114+
93115 fn copy_to_device (
94116 cuda_ctx : & CudaExecutionCtx ,
95117 bitpacked : & BitPackedArray ,
@@ -132,17 +154,23 @@ mod tests {
132154
133155 #[ test]
134156 fn test_for ( ) -> VortexResult < ( ) > {
135- let reference: u32 = 42 ;
136157 let len = 5000 ;
137158
138- let input: Vec < u32 > = ( 0 ..len) . map ( |i| i as u32 ) . collect ( ) ;
139- let expected: Vec < u32 > = input. iter ( ) . map ( |v| v + reference) . collect ( ) ;
159+ // Create original u32 data with an offset so FOR has a meaningful reference.
160+ let original: Vec < u32 > = ( 0 ..len) . map ( |i| i as u32 + 42 ) . collect ( ) ;
161+ let primitive = PrimitiveArray :: new ( Buffer :: from ( original. clone ( ) ) , NonNullable ) ;
162+
163+ // FOR encode to get the reference and encoded (subtracted) values.
164+ let for_array = FoRArray :: encode ( primitive) ?;
165+ let reference = u32:: try_from ( for_array. reference_scalar ( ) ) ?;
140166
141167 let cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
142168
169+ // Copy the encoded values to device.
170+ let encoded_prim = for_array. encoded ( ) . to_primitive ( ) ;
143171 let device_input = cuda_ctx
144172 . stream ( )
145- . clone_htod ( input . as_slice ( ) )
173+ . clone_htod ( encoded_prim . as_slice :: < u32 > ( ) )
146174 . expect ( "copy input to device" ) ;
147175 let input_ptr = device_input. device_ptr ( cuda_ctx. stream ( ) ) . 0 ;
148176
@@ -151,13 +179,106 @@ mod tests {
151179 param : reference as u64 ,
152180 } ] ;
153181
182+ // Kernel should reconstruct the original data.
154183 let result = run_dynamic_dispatch_u32 ( & cuda_ctx, input_ptr, len, & ops) ?;
155- assert_eq ! ( result, expected) ;
184+ assert_eq ! ( result, original) ;
185+
186+ Ok ( ( ) )
187+ }
188+
189+ #[ test]
190+ fn test_alp ( ) -> VortexResult < ( ) > {
191+ let len = 2050 ;
192+
193+ // Start from f32 data that ALP-encodes cleanly (no patches).
194+ let exponents = Exponents { e : 2 , f : 0 } ;
195+ let floats: Vec < f32 > = ( 0 ..len)
196+ . map ( |i| <f32 as ALPFloat >:: decode_single ( i as i32 , exponents) )
197+ . collect ( ) ;
198+ let float_prim = PrimitiveArray :: new ( Buffer :: from ( floats. clone ( ) ) , NonNullable ) ;
199+
200+ // ALP encode the float data.
201+ let alp_array = alp_encode ( & float_prim, Some ( exponents) ) ?;
202+ assert ! ( alp_array. patches( ) . is_none( ) ) ;
203+
204+ // Derive ALP decode factors from the actual exponents.
205+ let f = <f32 as ALPFloat >:: F10 [ alp_array. exponents ( ) . f as usize ] ;
206+ let e = <f32 as ALPFloat >:: IF10 [ alp_array. exponents ( ) . e as usize ] ;
207+
208+ let cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
209+
210+ // Copy encoded i32 values to device (reinterpreted as u32).
211+ let encoded_as_u32 = alp_array
212+ . encoded ( )
213+ . to_primitive ( )
214+ . reinterpret_cast ( PType :: U32 ) ;
215+ let device_input = cuda_ctx
216+ . stream ( )
217+ . clone_htod ( encoded_as_u32. as_slice :: < u32 > ( ) )
218+ . expect ( "copy input to device" ) ;
219+ let input_ptr = device_input. device_ptr ( cuda_ctx. stream ( ) ) . 0 ;
220+
221+ let ops = [ DynamicOp {
222+ op : DynamicOpCode_ALP ,
223+ param : pack_alp_f32_param ( f, e) ,
224+ } ] ;
225+
226+ let result = run_dynamic_dispatch_f32 ( & cuda_ctx, input_ptr, len, & ops) ?;
227+ assert_eq ! ( result, floats) ;
228+
229+ Ok ( ( ) )
230+ }
231+
232+ #[ test]
233+ fn test_alp_for_bitunpack ( ) -> VortexResult < ( ) > {
234+ let len = 2050 ;
235+
236+ let exponents = Exponents { e : 2 , f : 0 } ;
237+ let floats: Vec < f32 > = ( 0 ..len)
238+ . map ( |i| <f32 as ALPFloat >:: decode_single ( 10 + ( i as i32 % 64 ) , exponents) )
239+ . collect ( ) ;
240+ let float_prim = PrimitiveArray :: new ( Buffer :: from ( floats. clone ( ) ) , NonNullable ) ;
241+
242+ // ALP encode f32 → i32 encoded integers + exponents.
243+ let alp_array = alp_encode ( & float_prim, Some ( exponents) ) ?;
244+ assert ! ( alp_array. patches( ) . is_none( ) ) ;
245+
246+ // FOR encode the ALP-encoded i32 integers.
247+ let for_array = FoRArray :: encode ( alp_array. encoded ( ) . to_primitive ( ) ) ?;
248+ let reference = i32:: try_from ( for_array. reference_scalar ( ) ) ? as u32 ;
249+
250+ // BitPack the FOR-encoded values.
251+ let bit_width: u8 = 6 ;
252+ let bitpacked = BitPackedArray :: encode ( for_array. encoded ( ) , bit_width) ?;
253+
254+ // Derive ALP decode factors from the actual exponents.
255+ let alp_f = <f32 as ALPFloat >:: F10 [ alp_array. exponents ( ) . f as usize ] ;
256+ let alp_e = <f32 as ALPFloat >:: IF10 [ alp_array. exponents ( ) . e as usize ] ;
257+
258+ let cuda_ctx = CudaSession :: create_execution_ctx ( & VortexSession :: empty ( ) ) ?;
259+ let ( input_ptr, _device_input) = copy_to_device ( & cuda_ctx, & bitpacked) ?;
260+
261+ let ops = [
262+ DynamicOp {
263+ op : DynamicOpCode_BITUNPACK ,
264+ param : bit_width as u64 ,
265+ } ,
266+ DynamicOp {
267+ op : DynamicOpCode_FOR ,
268+ param : reference as u64 ,
269+ } ,
270+ DynamicOp {
271+ op : DynamicOpCode_ALP ,
272+ param : pack_alp_f32_param ( alp_f, alp_e) ,
273+ } ,
274+ ] ;
275+
276+ let result = run_dynamic_dispatch_f32 ( & cuda_ctx, input_ptr, len, & ops) ?;
277+ assert_eq ! ( result, floats) ;
156278
157279 Ok ( ( ) )
158280 }
159281
160- /// 1 bitunpack + 7 FoR
161282 #[ test]
162283 fn test_max_ops_bitunpack_7for ( ) -> VortexResult < ( ) > {
163284 let bit_width: u8 = 6 ;
0 commit comments