Skip to content

Commit 0076f55

Browse files
committed
Infer shredding for parquet
1 parent 686d844 commit 0076f55

7 files changed

Lines changed: 1202 additions & 1 deletion

File tree

common/variant/src/main/java/org/apache/spark/types/variant/Variant.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,11 @@ public BigDecimal getDecimal() {
9999
return VariantUtil.getDecimal(value, pos);
100100
}
101101

102+
// Get the decimal value, including trailing zeros
103+
public BigDecimal getDecimalWithOriginalScale() {
104+
return VariantUtil.getDecimalWithOriginalScale(value, pos);
105+
}
106+
102107
// Get a float value from the variant.
103108
public float getFloat() {
104109
return VariantUtil.getFloat(value, pos);

sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5343,6 +5343,31 @@ object SQLConf {
53435343
.stringConf
53445344
.createWithDefault("")
53455345

5346+
val VARIANT_SHREDDING_MAX_SCHEMA_WIDTH =
5347+
buildConf("spark.sql.variant.shredding.maxSchemaWidth")
5348+
.internal()
5349+
.doc("Maximum number of shredded fields to create when inferring a schema for Variant")
5350+
.version("4.2.0")
5351+
.intConf
5352+
.createWithDefault(300)
5353+
5354+
val VARIANT_SHREDDING_MAX_SCHEMA_DEPTH =
5355+
buildConf("spark.sql.variant.shredding.maxSchemaDepth")
5356+
.internal()
5357+
.doc("Maximum depth in Variant value to traverse when inferring a schema. " +
5358+
"Any array/object below this depth will be shredded as a single binary.")
5359+
.version("4.2.0")
5360+
.intConf
5361+
.createWithDefault(50)
5362+
5363+
val VARIANT_INFER_SHREDDING_SCHEMA =
5364+
buildConf("spark.sql.variant.inferShreddingSchema")
5365+
.internal()
5366+
.doc("Infer shredding schema when writing Variant columns in Parquet tables.")
5367+
.version("4.2.0")
5368+
.booleanConf
5369+
.createWithDefault(false)
5370+
53465371
val LEGACY_CSV_ENABLE_DATE_TIME_PARSING_FALLBACK =
53475372
buildConf("spark.sql.legacy.csv.enableDateTimeParsingFallback")
53485373
.internal()

0 commit comments

Comments
 (0)