Skip to content

Commit 39da29f

Browse files
Jefffreyalamb
andauthored
Add ScalarValue::RunEndEncoded variant (#19895)
## Which issue does this PR close? <!-- We generally require a GitHub issue to be filed for all bug fixes and enhancements and this helps us generate change logs for our releases. You can link an issue to this PR using the GitHub syntax. For example `Closes #123` indicates that this PR will close issue #123. --> - Closes #18563 ## Rationale for this change <!-- Why are you proposing this change? If this is already explained clearly in the issue then this section is not needed. Explaining clearly why changes are proposed helps reviewers understand your changes and offer better suggestions for fixes. --> Support RunEndEncoded scalar values, similar to how we support for Dictionary. ## What changes are included in this PR? <!-- There is no need to duplicate the description in the issue here but it is sometimes worth providing a summary of the individual changes in this PR. --> - Add new `ScalarValue::RunEndEncoded` enum variant - Fix `ScalarValue::new_default` to support `Decimal32` and `Decimal64` - Support RunEndEncoded type in proto for both `ScalarValue` message and `ArrowType` message ## Are these changes tested? <!-- We typically require tests for all PRs in order to: 1. Prevent the code from being accidentally broken by subsequent changes 2. Serve as another way to document the expected behavior of the code If tests are not included in your PR, please explain why (for example, are they covered by existing tests)? --> Added tests. ## Are there any user-facing changes? <!-- If there are user-facing changes then we may require documentation to be updated before approving the PR. --> New variant for `ScalarValue` Protobuf changes to support RunEndEncoded type <!-- If there are any breaking changes to public APIs, please add the `api change` label. --> --------- Co-authored-by: Andrew Lamb <andrew@nerdnetworks.org>
1 parent dab903e commit 39da29f

File tree

10 files changed

+906
-51
lines changed

10 files changed

+906
-51
lines changed

datafusion/common/src/cast.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,9 @@ use arrow::array::{
2525
BinaryViewArray, Decimal32Array, Decimal64Array, DurationMicrosecondArray,
2626
DurationMillisecondArray, DurationNanosecondArray, DurationSecondArray, Float16Array,
2727
Int8Array, Int16Array, LargeBinaryArray, LargeListViewArray, LargeStringArray,
28-
ListViewArray, StringViewArray, UInt16Array,
28+
ListViewArray, RunArray, StringViewArray, UInt16Array,
2929
};
30+
use arrow::datatypes::RunEndIndexType;
3031
use arrow::{
3132
array::{
3233
Array, BinaryArray, BooleanArray, Date32Array, Date64Array, Decimal128Array,
@@ -334,3 +335,8 @@ pub fn as_list_view_array(array: &dyn Array) -> Result<&ListViewArray> {
334335
pub fn as_large_list_view_array(array: &dyn Array) -> Result<&LargeListViewArray> {
335336
Ok(downcast_value!(array, LargeListViewArray))
336337
}
338+
339+
// Downcast Array to RunArray
340+
pub fn as_run_array<T: RunEndIndexType>(array: &dyn Array) -> Result<&RunArray<T>> {
341+
Ok(downcast_value!(array, RunArray, T))
342+
}

datafusion/common/src/scalar/mod.rs

Lines changed: 445 additions & 13 deletions
Large diffs are not rendered by default.

datafusion/proto-common/proto/datafusion_common.proto

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,11 @@ message Map {
183183
bool keys_sorted = 2;
184184
}
185185

186+
message RunEndEncoded {
187+
Field run_ends_field = 1;
188+
Field values_field = 2;
189+
}
190+
186191
enum UnionMode{
187192
sparse = 0;
188193
dense = 1;
@@ -236,6 +241,12 @@ message ScalarDictionaryValue {
236241
ScalarValue value = 2;
237242
}
238243

244+
message ScalarRunEndEncodedValue {
245+
Field run_ends_field = 1;
246+
Field values_field = 2;
247+
ScalarValue value = 3;
248+
}
249+
239250
message IntervalDayTimeValue {
240251
int32 days = 1;
241252
int32 milliseconds = 2;
@@ -321,6 +332,8 @@ message ScalarValue{
321332
IntervalMonthDayNanoValue interval_month_day_nano = 31;
322333
ScalarFixedSizeBinary fixed_size_binary_value = 34;
323334
UnionValue union_value = 42;
335+
336+
ScalarRunEndEncodedValue run_end_encoded_value = 45;
324337
}
325338
}
326339

@@ -389,6 +402,7 @@ message ArrowType{
389402
Union UNION = 29;
390403
Dictionary DICTIONARY = 30;
391404
Map MAP = 33;
405+
RunEndEncoded RUN_END_ENCODED = 42;
392406
}
393407
}
394408

datafusion/proto-common/src/from_proto/mod.rs

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,19 @@ impl TryFrom<&protobuf::arrow_type::ArrowTypeEnum> for DataType {
326326
let keys_sorted = map.keys_sorted;
327327
DataType::Map(Arc::new(field), keys_sorted)
328328
}
329+
arrow_type::ArrowTypeEnum::RunEndEncoded(run_end_encoded) => {
330+
let run_ends_field: Field = run_end_encoded
331+
.as_ref()
332+
.run_ends_field
333+
.as_deref()
334+
.required("run_ends_field")?;
335+
let value_field: Field = run_end_encoded
336+
.as_ref()
337+
.values_field
338+
.as_deref()
339+
.required("values_field")?;
340+
DataType::RunEndEncoded(run_ends_field.into(), value_field.into())
341+
}
329342
})
330343
}
331344
}
@@ -578,6 +591,32 @@ impl TryFrom<&protobuf::ScalarValue> for ScalarValue {
578591

579592
Self::Dictionary(Box::new(index_type), Box::new(value))
580593
}
594+
Value::RunEndEncodedValue(v) => {
595+
let run_ends_field: Field = v
596+
.run_ends_field
597+
.as_ref()
598+
.ok_or_else(|| Error::required("run_ends_field"))?
599+
.try_into()?;
600+
601+
let values_field: Field = v
602+
.values_field
603+
.as_ref()
604+
.ok_or_else(|| Error::required("values_field"))?
605+
.try_into()?;
606+
607+
let value: Self = v
608+
.value
609+
.as_ref()
610+
.ok_or_else(|| Error::required("value"))?
611+
.as_ref()
612+
.try_into()?;
613+
614+
Self::RunEndEncoded(
615+
run_ends_field.into(),
616+
values_field.into(),
617+
Box::new(value),
618+
)
619+
}
581620
Value::BinaryValue(v) => Self::Binary(Some(v.clone())),
582621
Value::BinaryViewValue(v) => Self::BinaryView(Some(v.clone())),
583622
Value::LargeBinaryValue(v) => Self::LargeBinary(Some(v.clone())),

0 commit comments

Comments
 (0)