Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,9 @@ package org.apache.gluten.execution

import org.apache.gluten.config.{GlutenConfig, GlutenCoreConfig, VeloxConfig}
import org.apache.gluten.expression.VeloxDummyExpression
import org.apache.gluten.sql.shims.SparkShimLoader

import org.apache.spark.SparkConf
import org.apache.spark.sql.{AnalysisException, DataFrame, Row}
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.execution._
import org.apache.spark.sql.execution.adaptive.{AdaptiveSparkPlanHelper, AQEShuffleReadExec, ShuffleQueryStageExec}
import org.apache.spark.sql.execution.joins.BaseJoinExec
Expand Down Expand Up @@ -911,18 +910,9 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa

test("Verify parquet field name with special character") {
withTable("t") {

// https://github.com/apache/spark/pull/35229 Spark remove parquet field name check after 3.2
if (!SparkShimLoader.getSparkVersion.startsWith("3.2")) {
sql("create table t using parquet as select sum(l_partkey) from lineitem")
runQueryAndCompare("select * from t") {
checkGlutenPlan[FileSourceScanExecTransformer]
}
} else {
val msg = intercept[AnalysisException] {
sql("create table t using parquet as select sum(l_partkey) from lineitem")
}.message
assert(msg.contains("contains invalid character"))
sql("create table t using parquet as select sum(l_partkey) from lineitem")
runQueryAndCompare("select * from t") {
checkGlutenPlan[FileSourceScanExecTransformer]
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import org.apache.spark.sql.execution.SparkPlan
trait GlutenPlan
extends SparkPlan
with Convention.KnownBatchType
with Convention.KnownRowTypeForSpark33OrLater
with Convention.KnownRowTypeWithDefault
with GlutenPlan.SupportsRowBasedCompatible
with ConventionReq.KnownChildConvention {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ import java.util.concurrent.atomic.AtomicBoolean
case class GroupLeafExec(groupId: Int, metadata: GlutenMetadata, convReq: Conv.Req)
extends LeafExecNode
with Convention.KnownBatchType
with Convention.KnownRowTypeForSpark33OrLater
with Convention.KnownRowTypeWithDefault
with GlutenPlan.SupportsRowBasedCompatible {

private val frozen = new AtomicBoolean(false)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.gluten.extension.columnar.transition

import org.apache.spark.sql.execution.{ColumnarToRowExec, RowToColumnarExec, SparkPlan}
import org.apache.spark.util.SparkVersionUtil

import scala.collection.mutable

Expand Down Expand Up @@ -167,24 +166,11 @@ object Convention {
def rowType(): RowType
}

trait KnownRowTypeForSpark33OrLater extends KnownRowType {
trait KnownRowTypeWithDefault extends KnownRowType {
this: SparkPlan =>

final override def rowType(): RowType = {
if (SparkVersionUtil.lteSpark32) {
// It's known that in Spark 3.2, one Spark plan node is considered either only having
// row-based support or only having columnar support at a time.
// Hence, if the plan supports columnar output, we'd disable its row-based support.
// The same for the opposite.
if (supportsColumnar) {
Convention.RowType.None
} else {
assert(rowType0() != Convention.RowType.None)
rowType0()
}
} else {
rowType0()
}
rowType0()
}

def rowType0(): RowType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import org.apache.gluten.config.GlutenCoreConfig

import org.apache.spark.sql.catalyst.SQLConfHelper
import org.apache.spark.sql.execution.SparkPlan
import org.apache.spark.util.{SparkVersionUtil, Utils}
import org.apache.spark.util.Utils

/**
* This [[CostEvaluator]] is to force use the new physical plan when cost is equal.
Expand All @@ -30,16 +30,11 @@ import org.apache.spark.util.{SparkVersionUtil, Utils}
case class GlutenCostEvaluator() extends CostEvaluator with SQLConfHelper {

private val vanillaCostEvaluator: CostEvaluator = {
if (SparkVersionUtil.lteSpark32) {
val clazz = Utils.classForName("org.apache.spark.sql.execution.adaptive.SimpleCostEvaluator$")
clazz.getDeclaredField("MODULE$").get(null).asInstanceOf[CostEvaluator]
} else {
val forceOptimizeSkewedJoin =
conf.getConfString("spark.sql.adaptive.forceOptimizeSkewedJoin").toBoolean
val clazz = Utils.classForName("org.apache.spark.sql.execution.adaptive.SimpleCostEvaluator")
val ctor = clazz.getConstructor(classOf[Boolean])
ctor.newInstance(forceOptimizeSkewedJoin.asInstanceOf[Object]).asInstanceOf[CostEvaluator]
}
val forceOptimizeSkewedJoin =
conf.getConfString("spark.sql.adaptive.forceOptimizeSkewedJoin").toBoolean
val clazz = Utils.classForName("org.apache.spark.sql.execution.adaptive.SimpleCostEvaluator")
val ctor = clazz.getConstructor(classOf[Boolean])
ctor.newInstance(forceOptimizeSkewedJoin.asInstanceOf[Object]).asInstanceOf[CostEvaluator]
}

override def evaluateCost(plan: SparkPlan): Cost = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,16 +23,12 @@ import org.apache.spark.sql.internal.SQLConf
object SparkPlanUtil {

def supportsRowBased(plan: SparkPlan): Boolean = {
if (SparkVersionUtil.lteSpark32) {
return !plan.supportsColumnar
}

val m = classOf[SparkPlan].getMethod("supportsRowBased")
m.invoke(plan).asInstanceOf[Boolean]
}

def isPlannedV1Write(plan: DataWritingCommandExec): Boolean = {
if (SparkVersionUtil.lteSpark33) {
if (SparkVersionUtil.eqSpark33) {
return false
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,6 @@ object SparkTaskUtil {
ctors.head
}

if (SparkVersionUtil.lteSpark32) {
return ctor
.newInstance(
stageId,
stageAttemptNumber,
partitionId,
taskAttemptId,
attemptNumber,
taskMemoryManager,
localProperties,
metricsSystem,
taskMetrics,
resources
)
.asInstanceOf[TaskContext]
}

if (SparkVersionUtil.eqSpark33) {
return ctor
.newInstance(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@
package org.apache.spark.util

object SparkVersionUtil {
val lteSpark32: Boolean = compareMajorMinorVersion((3, 2)) <= 0
private val comparedWithSpark33 = compareMajorMinorVersion((3, 3))
private val comparedWithSpark35 = compareMajorMinorVersion((3, 5))
val eqSpark33: Boolean = comparedWithSpark33 == 0
val lteSpark33: Boolean = lteSpark32 || eqSpark33
val gteSpark33: Boolean = comparedWithSpark33 >= 0
val gteSpark35: Boolean = comparedWithSpark35 >= 0
val gteSpark40: Boolean = compareMajorMinorVersion((4, 0)) >= 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ case class ColumnarCollapseTransformStages(glutenConf: GlutenConfig) extends Rul
case class ColumnarInputAdapter(child: SparkPlan)
extends InputAdapterGenerateTreeStringShim
with Convention.KnownBatchType
with Convention.KnownRowTypeForSpark33OrLater
with Convention.KnownRowTypeWithDefault
with GlutenPlan.SupportsRowBasedCompatible
with ConventionReq.KnownChildConvention {
override def output: Seq[Attribute] = child.output
Expand Down

This file was deleted.