Skip to content

org.apache.spark.SparkException: Failed to execute user defined function (Ray$$Lambda$6471/213060338: (array<string>) => array<string>) #1927

@zhouenxian

Description

@zhouenxian

My program has a bug that has been bothering me for a long time. Here are my code:
`
-- 模型转化为预测函数,方便我们看原始模型的效果
!python conf "rayAddress=localhost:10001";
!python conf "pythonExec=/home/byzerllm/miniconda3/envs/byzerllm-dev/bin/python";
!python conf "dataMode=model";
!python conf "runIn=driver";
!python conf "schema=st(field(value,string))";

!python conf "num_gpus=1";
!python conf "maxConcurrency=1";
!python conf "standalone=true";

run command as LLM.`` where
action="infer"
-- chatglm/moss
and pretrainedModelType="custom/chatglm2"
and localModelDir="/home/byzerllm/DL/checkpoint/chatglm2-6b"

and modelWaitServerReadyTimeout="300"
and quantizationBit="true"
and quantizationBitNum="4"
and udfName="chatglm2_origin_model_predict"
and modelTable="command";

select '{
"instruction":"类型#裤版型#宽松风格#性感图案#线条裤型#阔腿裤",
"output":"NAN"
}' as value as testData;

select chatglm2_origin_model_predict(array(value)) as r from testData as finalResult;
`
This code imitates the demonstration provided by the official website, but an error occurred during runtime. Below is the message printed by the console:

`
Job aborted due to stage failure: Task 0 in stage 737.0 failed 1 times, most recent failure: Lost task 0.0 in stage 737.0 (TID 6393) (192.168.1.230 executor driver): org.apache.spark.SparkException: Failed to execute user defined function (Ray$$Lambda$6471/213060338: (array) => array)

Job aborted due to stage failure: Task 0 in stage 737.0 failed 1 times, most recent failure: Lost task 0.0 in stage 737.0 (TID 6393) (192.168.1.230 executor driver): org.apache.spark.SparkException: Failed to execute user defined function (Ray$$Lambda$6471/213060338: (array) => array)
at org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:177)
at org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.Dataset$$anon$1.hasNext(Dataset.scala:3589)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:136)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.util.NoSuchElementException: head of empty list
at scala.collection.immutable.Nil$.head(List.scala:469)
at scala.collection.immutable.Nil$.head(List.scala:466)
at tech.mlsql.ets.Ray.$anonfun$predict$7(Ray.scala:373)
... 25 more
Driver stacktrace:
org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 737.0 failed 1 times, most recent failure: Lost task 0.0 in stage 737.0 (TID 6393) (192.168.1.230 executor driver): org.apache.spark.SparkException: Failed to execute user defined function (Ray$$Lambda$6471/213060338: (array) => array)
at org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:177)
at org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.Dataset$$anon$1.hasNext(Dataset.scala:3589)
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
at org.apache.spark.scheduler.Task.run(Task.scala:136)
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
at java.lang.Thread.run(Thread.java:750)
Caused by: java.util.NoSuchElementException: head of empty list
at scala.collection.immutable.Nil$.head(List.scala:469)
at scala.collection.immutable.Nil$.head(List.scala:466)
at tech.mlsql.ets.Ray.$anonfun$predict$7(Ray.scala:373)
... 25 more
Driver stacktrace:
org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2672)
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2608)
org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2607)
scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2607)
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1182)
org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1182)
scala.Option.foreach(Option.scala:407)
org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1182)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2860)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2802)
org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2791)
org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:952)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2228)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2249)
org.apache.spark.SparkContext.runJob(SparkContext.scala:2268)
org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:506)
org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:459)
org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:48)
org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:3868)
org.apache.spark.sql.Dataset.$anonfun$head$1(Dataset.scala:2863)
org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3858)
org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:510)
org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3856)
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:109)
org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:169)
org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:95)
org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:779)
org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
org.apache.spark.sql.Dataset.withAction(Dataset.scala:3856)
org.apache.spark.sql.Dataset.head(Dataset.scala:2863)
org.apache.spark.sql.Dataset.take(Dataset.scala:3084)
streaming.rest.RestController.getScriptResult(RestController.scala:288)
streaming.rest.RestController.$anonfun$script$1(RestController.scala:163)
tech.mlsql.job.JobManager$.run(JobManager.scala:74)
tech.mlsql.job.JobManager$$anon$1.run(JobManager.scala:91)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:750)
caused by:
org.apache.spark.SparkException: Failed to execute user defined function (Ray$$Lambda$6471/213060338: (array) => array)
org.apache.spark.sql.errors.QueryExecutionErrors$.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala:177)
org.apache.spark.sql.errors.QueryExecutionErrors.failedExecuteUserDefinedFunctionError(QueryExecutionErrors.scala)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
org.apache.spark.sql.Dataset$$anon$1.hasNext(Dataset.scala:3589)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
org.apache.spark.scheduler.Task.run(Task.scala:136)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:750)
caused by:
java.util.NoSuchElementException: head of empty list
scala.collection.immutable.Nil$.head(List.scala:469)
scala.collection.immutable.Nil$.head(List.scala:466)
tech.mlsql.ets.Ray.$anonfun$predict$7(Ray.scala:373)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.project_doConsume_0$(Unknown Source)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
org.apache.spark.sql.Dataset$$anon$1.hasNext(Dataset.scala:3589)
scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460)
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760)
org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890)
org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890)
org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365)
org.apache.spark.rdd.RDD.iterator(RDD.scala:329)
org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
org.apache.spark.scheduler.Task.run(Task.scala:136)
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548)
org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504)
org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551)
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
java.lang.Thread.run(Thread.java:750)
`

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions