-
Notifications
You must be signed in to change notification settings - Fork 582
Open
Labels
enhancementNew feature or requestNew feature or request
Description
Description
there still is a C2R in the parquet write stage when we create partitioned table like
df.write.mode("overwrite").format("delta").option("compression", "zstd").partitionBy(tbl_filenum[tbl]['part']).option("optimizeWrite", "True").save(f"s3a://presto-workload/{databasename}/{tbl}")
saw in call stack:
app//org.apache.gluten.vectorized.NativeColumnarToRowJniWrapper.nativeColumnarToRowConvert(Native Method)
app//org.apache.gluten.execution.VeloxColumnarToRowExec$Converter$$anon$2.next(VeloxColumnarToRowExec.scala:193)
app//org.apache.gluten.execution.VeloxColumnarToRowExec$Converter$$anon$2.next(VeloxColumnarToRowExec.scala:169)
app//scala.collection.Iterator.foreach(Iterator.scala:943)
app//scala.collection.Iterator.foreach$(Iterator.scala:943)
app//org.apache.gluten.execution.VeloxColumnarToRowExec$Converter$$anon$2.foreach(VeloxColumnarToRowExec.scala:169)
app//org.apache.spark.sql.delta.stats.GlutenDeltaJobStatisticsTracker$GlutenDeltaTaskStatisticsTracker.newRow(GlutenDeltaWriteJobStatsTracker.scala:73)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$ColumnarDynamicPartitionDataSingleWriter.$anonfun$write$11(GlutenDeltaFileFormatWriter.scala:596)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$ColumnarDynamicPartitionDataSingleWriter.$anonfun$write$11$adapted(GlutenDeltaFileFormatWriter.scala:595)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$ColumnarDynamicPartitionDataSingleWriter$$Lambda$2037/0x00000008018bc318.apply(Unknown Source)
app//scala.collection.immutable.List.foreach(List.scala:431)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$ColumnarDynamicPartitionDataSingleWriter.write(GlutenDeltaFileFormatWriter.scala:595)
app//org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithMetrics(FileFormatDataWriter.scala:85)
app//org.apache.spark.sql.execution.datasources.FileFormatDataWriter.writeWithIterator(FileFormatDataWriter.scala:92)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$.$anonfun$executeTask$1(GlutenDeltaFileFormatWriter.scala:478)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$$$Lambda$1916/0x00000008018877f0.apply(Unknown Source)
app//org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1397)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$.executeTask(GlutenDeltaFileFormatWriter.scala:485)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$.$anonfun$executeWrite$4(GlutenDeltaFileFormatWriter.scala:313)
app//org.apache.spark.sql.delta.files.GlutenDeltaFileFormatWriter$$$Lambda$1553/0x00000008016bd7f0.apply(Unknown Source)
app//org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93)
app//org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166)
app//org.apache.spark.scheduler.Task.run(Task.scala:141)
app//org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620)
app//org.apache.spark.executor.Executor$TaskRunner$$Lambda$936/0x00000008012177d8.apply(Unknown Source)
app//org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64)
app//org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61)
app//org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94)
app//org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
java.base@17.0.1/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
java.base@17.0.1/java.lang.Thread.run(Thread.java:833)
Gluten version
None
Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
enhancementNew feature or requestNew feature or request
Type
Projects
Status
No status