@@ -28,111 +28,92 @@ pub(crate) fn dictionary_cast<K: ArrowDictionaryKeyType>(
2828) -> Result < ArrayRef , ArrowError > {
2929 use DataType :: * ;
3030
31- match to_type {
32- Dictionary ( to_index_type, to_value_type) => {
33- let dict_array = array
34- . as_any ( )
35- . downcast_ref :: < DictionaryArray < K > > ( )
36- . ok_or_else ( || {
37- ArrowError :: ComputeError (
38- "Internal Error: Cannot cast dictionary to DictionaryArray of expected type" . to_string ( ) ,
39- )
40- } ) ?;
31+ let array = array. as_dictionary :: < K > ( ) ;
32+ let from_child_type = array. values ( ) . data_type ( ) ;
33+ match ( from_child_type, to_type) {
34+ ( _, Dictionary ( to_index_type, to_value_type) ) => {
35+ dictionary_to_dictionary_cast ( array, to_index_type, to_value_type, cast_options)
36+ }
37+ // `unpack_dictionary` can handle Utf8View/BinaryView types, but incurs unnecessary data
38+ // copy of the value buffer. Fast path which avoids copying underlying values buffer.
39+ // TODO: handle LargeUtf8/LargeBinary -> View (need to check offsets can fit)
40+ // TODO: handle cross types (String -> BinaryView, Binary -> StringView)
41+ // (need to validate utf8?)
42+ ( Utf8 , Utf8View ) => view_from_dict_values :: < K , Utf8Type , StringViewType > (
43+ array. keys ( ) ,
44+ array. values ( ) . as_string :: < i32 > ( ) ,
45+ ) ,
46+ ( Binary , BinaryView ) => view_from_dict_values :: < K , BinaryType , BinaryViewType > (
47+ array. keys ( ) ,
48+ array. values ( ) . as_binary :: < i32 > ( ) ,
49+ ) ,
50+ _ => unpack_dictionary ( array, to_type, cast_options) ,
51+ }
52+ }
4153
42- let keys_array: ArrayRef =
43- Arc :: new ( PrimitiveArray :: < K > :: from ( dict_array. keys ( ) . to_data ( ) ) ) ;
44- let values_array = dict_array. values ( ) ;
45- let cast_keys = cast_with_options ( & keys_array, to_index_type, cast_options) ?;
46- let cast_values = cast_with_options ( values_array, to_value_type, cast_options) ?;
54+ fn dictionary_to_dictionary_cast < K : ArrowDictionaryKeyType > (
55+ array : & DictionaryArray < K > ,
56+ to_index_type : & DataType ,
57+ to_value_type : & DataType ,
58+ cast_options : & CastOptions ,
59+ ) -> Result < ArrayRef , ArrowError > {
60+ use DataType :: * ;
4761
48- // Failure to cast keys (because they don't fit in the
49- // target type) results in NULL values;
50- if cast_keys. null_count ( ) > keys_array. null_count ( ) {
51- return Err ( ArrowError :: ComputeError ( format ! (
52- "Could not convert {} dictionary indexes from {:?} to {:?}" ,
53- cast_keys. null_count( ) - keys_array. null_count( ) ,
54- keys_array. data_type( ) ,
55- to_index_type
56- ) ) ) ;
57- }
62+ let keys_array: ArrayRef = Arc :: new ( PrimitiveArray :: < K > :: from ( array. keys ( ) . to_data ( ) ) ) ;
63+ let values_array = array. values ( ) ;
64+ let cast_keys = cast_with_options ( & keys_array, to_index_type, cast_options) ?;
65+ let cast_values = cast_with_options ( values_array, to_value_type, cast_options) ?;
5866
59- let data = cast_keys. into_data ( ) ;
60- let builder = data
61- . into_builder ( )
62- . data_type ( to_type. clone ( ) )
63- . child_data ( vec ! [ cast_values. into_data( ) ] ) ;
67+ // Failure to cast keys (because they don't fit in the
68+ // target type) results in NULL values;
69+ if cast_keys. null_count ( ) > keys_array. null_count ( ) {
70+ return Err ( ArrowError :: ComputeError ( format ! (
71+ "Could not convert {} dictionary indexes from {:?} to {:?}" ,
72+ cast_keys. null_count( ) - keys_array. null_count( ) ,
73+ keys_array. data_type( ) ,
74+ to_index_type
75+ ) ) ) ;
76+ }
6477
65- // Safety
66- // Cast keys are still valid
67- let data = unsafe { builder. build_unchecked ( ) } ;
78+ let data = cast_keys. into_data ( ) ;
79+ let builder = data
80+ . into_builder ( )
81+ . data_type ( Dictionary (
82+ Box :: new ( to_index_type. clone ( ) ) ,
83+ Box :: new ( to_value_type. clone ( ) ) ,
84+ ) )
85+ . child_data ( vec ! [ cast_values. into_data( ) ] ) ;
6886
69- // create the appropriate array type
70- let new_array: ArrayRef = match * * to_index_type {
71- Int8 => Arc :: new ( DictionaryArray :: < Int8Type > :: from ( data) ) ,
72- Int16 => Arc :: new ( DictionaryArray :: < Int16Type > :: from ( data) ) ,
73- Int32 => Arc :: new ( DictionaryArray :: < Int32Type > :: from ( data) ) ,
74- Int64 => Arc :: new ( DictionaryArray :: < Int64Type > :: from ( data) ) ,
75- UInt8 => Arc :: new ( DictionaryArray :: < UInt8Type > :: from ( data) ) ,
76- UInt16 => Arc :: new ( DictionaryArray :: < UInt16Type > :: from ( data) ) ,
77- UInt32 => Arc :: new ( DictionaryArray :: < UInt32Type > :: from ( data) ) ,
78- UInt64 => Arc :: new ( DictionaryArray :: < UInt64Type > :: from ( data) ) ,
79- _ => {
80- return Err ( ArrowError :: CastError ( format ! (
81- "Unsupported type {to_index_type} for dictionary index"
82- ) ) ) ;
83- }
84- } ;
87+ // Safety
88+ // Cast keys are still valid
89+ let data = unsafe { builder. build_unchecked ( ) } ;
8590
86- Ok ( new_array)
91+ // create the appropriate array type
92+ let new_array: ArrayRef = match to_index_type {
93+ Int8 => Arc :: new ( DictionaryArray :: < Int8Type > :: from ( data) ) ,
94+ Int16 => Arc :: new ( DictionaryArray :: < Int16Type > :: from ( data) ) ,
95+ Int32 => Arc :: new ( DictionaryArray :: < Int32Type > :: from ( data) ) ,
96+ Int64 => Arc :: new ( DictionaryArray :: < Int64Type > :: from ( data) ) ,
97+ UInt8 => Arc :: new ( DictionaryArray :: < UInt8Type > :: from ( data) ) ,
98+ UInt16 => Arc :: new ( DictionaryArray :: < UInt16Type > :: from ( data) ) ,
99+ UInt32 => Arc :: new ( DictionaryArray :: < UInt32Type > :: from ( data) ) ,
100+ UInt64 => Arc :: new ( DictionaryArray :: < UInt64Type > :: from ( data) ) ,
101+ _ => {
102+ return Err ( ArrowError :: CastError ( format ! (
103+ "Unsupported type {to_index_type} for dictionary index"
104+ ) ) ) ;
87105 }
88- Utf8View => {
89- // `unpack_dictionary` can handle Utf8View/BinaryView types, but incurs unnecessary data copy of the value buffer.
90- // we handle it here to avoid the copy.
91- let dict_array = array
92- . as_dictionary :: < K > ( )
93- . downcast_dict :: < StringArray > ( )
94- . ok_or_else ( || {
95- ArrowError :: ComputeError (
96- "Internal Error: Cannot cast Utf8View to StringArray of expected type"
97- . to_string ( ) ,
98- )
99- } ) ?;
106+ } ;
100107
101- let string_view = view_from_dict_values :: < K , StringViewType , GenericStringType < i32 > > (
102- dict_array. values ( ) ,
103- dict_array. keys ( ) ,
104- ) ?;
105- Ok ( Arc :: new ( string_view) )
106- }
107- BinaryView => {
108- // `unpack_dictionary` can handle Utf8View/BinaryView types, but incurs unnecessary data copy of the value buffer.
109- // we handle it here to avoid the copy.
110- let dict_array = array
111- . as_dictionary :: < K > ( )
112- . downcast_dict :: < BinaryArray > ( )
113- . ok_or_else ( || {
114- ArrowError :: ComputeError (
115- "Internal Error: Cannot cast BinaryView to BinaryArray of expected type"
116- . to_string ( ) ,
117- )
118- } ) ?;
119-
120- let binary_view = view_from_dict_values :: < K , BinaryViewType , BinaryType > (
121- dict_array. values ( ) ,
122- dict_array. keys ( ) ,
123- ) ?;
124- Ok ( Arc :: new ( binary_view) )
125- }
126- _ => unpack_dictionary :: < K > ( array, to_type, cast_options) ,
127- }
108+ Ok ( new_array)
128109}
129110
130- fn view_from_dict_values < K : ArrowDictionaryKeyType , T : ByteViewType , V : ByteArrayType > (
131- array : & GenericByteArray < V > ,
111+ fn view_from_dict_values < K : ArrowDictionaryKeyType , V : ByteArrayType , T : ByteViewType > (
132112 keys : & PrimitiveArray < K > ,
133- ) -> Result < GenericByteViewArray < T > , ArrowError > {
134- let value_buffer = array. values ( ) ;
135- let value_offsets = array. value_offsets ( ) ;
113+ values : & GenericByteArray < V > ,
114+ ) -> Result < ArrayRef , ArrowError > {
115+ let value_buffer = values. values ( ) ;
116+ let value_offsets = values. value_offsets ( ) ;
136117 let mut builder = GenericByteViewBuilder :: < T > :: with_capacity ( keys. len ( ) ) ;
137118 builder. append_block ( value_buffer. clone ( ) ) ;
138119 for i in keys. iter ( ) {
@@ -157,21 +138,17 @@ fn view_from_dict_values<K: ArrowDictionaryKeyType, T: ByteViewType, V: ByteArra
157138 }
158139 }
159140 }
160- Ok ( builder. finish ( ) )
141+ Ok ( Arc :: new ( builder. finish ( ) ) )
161142}
162143
163- // Unpack a dictionary where the keys are of type <K> into a flattened array of type to_type
164- pub ( crate ) fn unpack_dictionary < K > (
165- array : & dyn Array ,
144+ // Unpack a dictionary into a flattened array of type to_type
145+ pub ( crate ) fn unpack_dictionary < K : ArrowDictionaryKeyType > (
146+ array : & DictionaryArray < K > ,
166147 to_type : & DataType ,
167148 cast_options : & CastOptions ,
168- ) -> Result < ArrayRef , ArrowError >
169- where
170- K : ArrowDictionaryKeyType ,
171- {
172- let dict_array = array. as_dictionary :: < K > ( ) ;
173- let cast_dict_values = cast_with_options ( dict_array. values ( ) , to_type, cast_options) ?;
174- take ( cast_dict_values. as_ref ( ) , dict_array. keys ( ) , None )
149+ ) -> Result < ArrayRef , ArrowError > {
150+ let cast_dict_values = cast_with_options ( array. values ( ) , to_type, cast_options) ?;
151+ take ( cast_dict_values. as_ref ( ) , array. keys ( ) , None )
175152}
176153
177154/// Pack a data type into a dictionary array passing the values through a primitive array
0 commit comments