-
Notifications
You must be signed in to change notification settings - Fork 26
Meeting hang issue when running TPC-DS with plasma cache. #47
Description
For OAP-1.1, we run TPC-DS 99 queries for Power Test and we meeting hang issue when running q66.sql. The issue is reproduced on 2 clusters. Some messages are showed below:
org.apache.arrow.plasma.PlasmaClientJNI.contains(Native Method)
org.apache.arrow.plasma.PlasmaClient.contains(PlasmaClient.java:182)
org.apache.spark.sql.execution.datasources.oap.filecache.ExternalCache.contains(OapCache.scala:1059)
org.apache.spark.sql.execution.datasources.oap.filecache.ExternalCache.get(OapCache.scala:1066)
org.apache.spark.sql.execution.datasources.oap.filecache.FiberCacheManager.get(FiberCacheManager.scala:114)
org.apache.spark.sql.execution.datasources.oap.io.VectorizedCacheReader.$anonfun$readNextRowGroup$3(VectorizedCacheReader.scala:185)
org.apache.spark.sql.execution.datasources.oap.io.VectorizedCacheReader$$Lambda$2759/1238423103.apply(Unknown Source)
scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
scala.collection.TraversableLike$$Lambda$17/1699113578.apply(Unknown Source)
scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36)
scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33)
scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198)
scala.collection.TraversableLike.map(TraversableLike.scala:238)
scala.collection.TraversableLike.map$(TraversableLike.scala:231)
scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:198)
org.apache.spark.sql.execution.datasources.oap.io.VectorizedCacheReader.readNextRowGroup(VectorizedCacheReader.scala:177)
org.apache.spark.sql.execution.datasources.oap.io.VectorizedCacheReader.checkEndOfRowGroup(VectorizedCacheReader.scala:152)
org.apache.spark.sql.execution.datasources.oap.io.VectorizedCacheReader.nextBatch(VectorizedCacheReader.scala:129)
org.apache.spark.sql.execution.datasources.oap.io.VectorizedCacheReader.nextKeyValue(VectorizedCacheReader.scala:100)
org.apache.spark.sql.execution.datasources.oap.io.FileRecordReaderIterator.hasNext(DataFile.scala:66)
org.apache.spark.sql.execution.datasources.oap.io.OapCompletionIterator.hasNext(DataFile.scala:103)
org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:93)
org.apache.spark.sql.execution.OapFileSourceScanExec$$anon$1.hasNext(OapFileSourceScanExec.scala:393)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage11.columnartorow_nextBatch_0$(Unknown Source)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage11.agg_doAggregateWithKeys_0$(Unknown Source)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage11.processNext(Unknown Source)
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:729)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)
org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:132)
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)
org.apache.spark.scheduler.Task.run(Task.scala:127)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:444)
org.apache.spark.executor.Executor$TaskRunner$$Lambda$479/936964206.apply(Unknown Source)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:447)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
java.lang.Thread.run(Thread.java:745)