@@ -66,110 +66,72 @@ pub fn create_random_batch(
6666pub fn create_random_array (
6767 field : & Field ,
6868 size : usize ,
69- null_density : f32 ,
69+ mut null_density : f32 ,
7070 true_density : f32 ,
7171) -> Result < ArrayRef > {
72- // Override null density with 0.0 if the array is non-nullable
73- // and a primitive type in case a nested field is nullable
74- let primitive_null_density = match field. is_nullable ( ) {
75- true => null_density,
76- false => 0.0 ,
77- } ;
72+ // Override nullability in case of not nested and not dictionary
73+ // For nested we don't want to override as we want to keep the nullability for the children
74+ // For dictionary it handle the nullability internally
75+ if !field. data_type ( ) . is_nested ( ) && !matches ! ( field. data_type( ) , Dictionary ( _, _) ) {
76+ // Override null density with 0.0 if the array is non-nullable
77+ null_density = match field. is_nullable ( ) {
78+ true => null_density,
79+ false => 0.0 ,
80+ } ;
81+ }
82+
7883 use DataType :: * ;
79- Ok ( match field. data_type ( ) {
84+ let array = match field. data_type ( ) {
8085 Null => Arc :: new ( NullArray :: new ( size) ) as ArrayRef ,
81- Boolean => Arc :: new ( create_boolean_array (
82- size,
83- primitive_null_density,
84- true_density,
85- ) ) ,
86- Int8 => Arc :: new ( create_primitive_array :: < Int8Type > (
87- size,
88- primitive_null_density,
89- ) ) ,
90- Int16 => Arc :: new ( create_primitive_array :: < Int16Type > (
91- size,
92- primitive_null_density,
93- ) ) ,
94- Int32 => Arc :: new ( create_primitive_array :: < Int32Type > (
95- size,
96- primitive_null_density,
97- ) ) ,
98- Int64 => Arc :: new ( create_primitive_array :: < Int64Type > (
99- size,
100- primitive_null_density,
101- ) ) ,
102- UInt8 => Arc :: new ( create_primitive_array :: < UInt8Type > (
103- size,
104- primitive_null_density,
105- ) ) ,
106- UInt16 => Arc :: new ( create_primitive_array :: < UInt16Type > (
107- size,
108- primitive_null_density,
109- ) ) ,
110- UInt32 => Arc :: new ( create_primitive_array :: < UInt32Type > (
111- size,
112- primitive_null_density,
113- ) ) ,
114- UInt64 => Arc :: new ( create_primitive_array :: < UInt64Type > (
115- size,
116- primitive_null_density,
117- ) ) ,
86+ Boolean => Arc :: new ( create_boolean_array ( size, null_density, true_density) ) ,
87+ Int8 => Arc :: new ( create_primitive_array :: < Int8Type > ( size, null_density) ) ,
88+ Int16 => Arc :: new ( create_primitive_array :: < Int16Type > ( size, null_density) ) ,
89+ Int32 => Arc :: new ( create_primitive_array :: < Int32Type > ( size, null_density) ) ,
90+ Int64 => Arc :: new ( create_primitive_array :: < Int64Type > ( size, null_density) ) ,
91+ UInt8 => Arc :: new ( create_primitive_array :: < UInt8Type > ( size, null_density) ) ,
92+ UInt16 => Arc :: new ( create_primitive_array :: < UInt16Type > ( size, null_density) ) ,
93+ UInt32 => Arc :: new ( create_primitive_array :: < UInt32Type > ( size, null_density) ) ,
94+ UInt64 => Arc :: new ( create_primitive_array :: < UInt64Type > ( size, null_density) ) ,
11895 Float16 => {
11996 return Err ( ArrowError :: NotYetImplemented (
12097 "Float16 is not implemented" . to_string ( ) ,
12198 ) ) ;
12299 }
123- Float32 => Arc :: new ( create_primitive_array :: < Float32Type > (
124- size,
125- primitive_null_density,
126- ) ) ,
127- Float64 => Arc :: new ( create_primitive_array :: < Float64Type > (
128- size,
129- primitive_null_density,
130- ) ) ,
100+ Float32 => Arc :: new ( create_primitive_array :: < Float32Type > ( size, null_density) ) ,
101+ Float64 => Arc :: new ( create_primitive_array :: < Float64Type > ( size, null_density) ) ,
131102 Timestamp ( unit, tz) => match unit {
132103 TimeUnit :: Second => Arc :: new (
133- create_random_temporal_array :: < TimestampSecondType > ( size, primitive_null_density )
104+ create_random_temporal_array :: < TimestampSecondType > ( size, null_density )
134105 . with_timezone_opt ( tz. clone ( ) ) ,
135- ) ,
106+ ) as ArrayRef ,
136107 TimeUnit :: Millisecond => Arc :: new (
137- create_random_temporal_array :: < TimestampMillisecondType > (
138- size,
139- primitive_null_density,
140- )
141- . with_timezone_opt ( tz. clone ( ) ) ,
108+ create_random_temporal_array :: < TimestampMillisecondType > ( size, null_density)
109+ . with_timezone_opt ( tz. clone ( ) ) ,
142110 ) ,
143111 TimeUnit :: Microsecond => Arc :: new (
144- create_random_temporal_array :: < TimestampMicrosecondType > (
145- size,
146- primitive_null_density,
147- )
148- . with_timezone_opt ( tz. clone ( ) ) ,
112+ create_random_temporal_array :: < TimestampMicrosecondType > ( size, null_density)
113+ . with_timezone_opt ( tz. clone ( ) ) ,
149114 ) ,
150115 TimeUnit :: Nanosecond => Arc :: new (
151- create_random_temporal_array :: < TimestampNanosecondType > (
152- size,
153- primitive_null_density,
154- )
155- . with_timezone_opt ( tz. clone ( ) ) ,
116+ create_random_temporal_array :: < TimestampNanosecondType > ( size, null_density)
117+ . with_timezone_opt ( tz. clone ( ) ) ,
156118 ) ,
157119 } ,
158120 Date32 => Arc :: new ( create_random_temporal_array :: < Date32Type > (
159121 size,
160- primitive_null_density ,
122+ null_density ,
161123 ) ) ,
162124 Date64 => Arc :: new ( create_random_temporal_array :: < Date64Type > (
163125 size,
164- primitive_null_density ,
126+ null_density ,
165127 ) ) ,
166128 Time32 ( unit) => match unit {
167129 TimeUnit :: Second => Arc :: new ( create_random_temporal_array :: < Time32SecondType > (
168130 size,
169- primitive_null_density ,
131+ null_density ,
170132 ) ) as ArrayRef ,
171133 TimeUnit :: Millisecond => Arc :: new (
172- create_random_temporal_array :: < Time32MillisecondType > ( size, primitive_null_density ) ,
134+ create_random_temporal_array :: < Time32MillisecondType > ( size, null_density ) ,
173135 ) ,
174136 _ => {
175137 return Err ( ArrowError :: InvalidArgumentError ( format ! (
@@ -179,36 +141,31 @@ pub fn create_random_array(
179141 } ,
180142 Time64 ( unit) => match unit {
181143 TimeUnit :: Microsecond => Arc :: new (
182- create_random_temporal_array :: < Time64MicrosecondType > ( size, primitive_null_density ) ,
144+ create_random_temporal_array :: < Time64MicrosecondType > ( size, null_density ) ,
183145 ) as ArrayRef ,
184146 TimeUnit :: Nanosecond => Arc :: new ( create_random_temporal_array :: < Time64NanosecondType > (
185147 size,
186- primitive_null_density ,
148+ null_density ,
187149 ) ) ,
188150 _ => {
189151 return Err ( ArrowError :: InvalidArgumentError ( format ! (
190152 "Unsupported unit {unit:?} for Time64"
191153 ) ) ) ;
192154 }
193155 } ,
194- Utf8 => Arc :: new ( create_string_array :: < i32 > ( size, primitive_null_density ) ) ,
195- LargeUtf8 => Arc :: new ( create_string_array :: < i64 > ( size, primitive_null_density ) ) ,
156+ Utf8 => Arc :: new ( create_string_array :: < i32 > ( size, null_density ) ) ,
157+ LargeUtf8 => Arc :: new ( create_string_array :: < i64 > ( size, null_density ) ) ,
196158 Utf8View => Arc :: new ( create_string_view_array_with_len (
197159 size,
198- primitive_null_density ,
160+ null_density ,
199161 4 ,
200162 false ,
201163 ) ) ,
202- Binary => Arc :: new ( create_binary_array :: < i32 > ( size, primitive_null_density) ) ,
203- LargeBinary => Arc :: new ( create_binary_array :: < i64 > ( size, primitive_null_density) ) ,
204- FixedSizeBinary ( len) => Arc :: new ( create_fsb_array (
205- size,
206- primitive_null_density,
207- * len as usize ,
208- ) ) ,
164+ Binary => Arc :: new ( create_binary_array :: < i32 > ( size, null_density) ) ,
165+ LargeBinary => Arc :: new ( create_binary_array :: < i64 > ( size, null_density) ) ,
166+ FixedSizeBinary ( len) => Arc :: new ( create_fsb_array ( size, null_density, * len as usize ) ) ,
209167 BinaryView => Arc :: new (
210- create_string_view_array_with_len ( size, primitive_null_density, 4 , false )
211- . to_binary_view ( ) ,
168+ create_string_view_array_with_len ( size, null_density, 4 , false ) . to_binary_view ( ) ,
212169 ) ,
213170 List ( _) => create_random_list_array ( field, size, null_density, true_density) ?,
214171 LargeList ( _) => create_random_list_array ( field, size, null_density, true_density) ?,
@@ -230,7 +187,13 @@ pub fn create_random_array(
230187 "Generating random arrays not yet implemented for {other:?}"
231188 ) ) ) ;
232189 }
233- } )
190+ } ;
191+
192+ if !field. is_nullable ( ) {
193+ assert_eq ! ( array. null_count( ) , 0 ) ;
194+ }
195+
196+ Ok ( array)
234197}
235198
236199#[ inline]
@@ -812,4 +775,23 @@ mod tests {
812775 assert_eq ! ( array. len( ) , size) ;
813776 }
814777 }
778+
779+ #[ test]
780+ fn create_non_nullable_decimal_array_with_null_density ( ) {
781+ let size = 10 ;
782+ let fields = vec ! [
783+ Field :: new( "a" , DataType :: Decimal128 ( 10 , -2 ) , false ) ,
784+ Field :: new( "b" , DataType :: Decimal256 ( 10 , -2 ) , false ) ,
785+ ] ;
786+ let schema = Schema :: new ( fields) ;
787+ let schema_ref = Arc :: new ( schema) ;
788+ let batch = create_random_batch ( schema_ref. clone ( ) , size, 0.35 , 0.7 ) . unwrap ( ) ;
789+
790+ assert_eq ! ( batch. schema( ) , schema_ref) ;
791+ assert_eq ! ( batch. num_columns( ) , schema_ref. fields( ) . len( ) ) ;
792+ for array in batch. columns ( ) {
793+ assert_eq ! ( array. len( ) , size) ;
794+ assert_eq ! ( array. null_count( ) , 0 ) ;
795+ }
796+ }
815797}
0 commit comments