-
Notifications
You must be signed in to change notification settings - Fork 23
Description
I am getting serialization error in spark-shell when I am running the below command from your program https://github.com/ChitturiPadma/SparkforDataScienceCookbook/blob/master/chapter6-NLP-with-Spark/src/main/scala/com/packt/chapter6/Chunker_Demo
val textInput = sc.makeRDD(Array("I am Padma working in Fractal Analytics Company","I am a big data enthusiast",
"I love cooking"),1)
val modelFile = new File("en-pos-maxent.bin")
//val chunkerModelFile = new File("/home/padmac/opennlp_models/enchunker.bin")
val model = new POSModelLoader().load(modelFile)
val tagger = new POSTaggerME(model)
val broadCastedTagger = sc.broadcast(tagger)
val resultsAndSpan = textInput.map{sentence =>
val tokenizedLines =
WhitespaceTokenizer.INSTANCE.tokenize(sentence)
val tags = broadCastedTagger.value.tag(tokenizedLines)
tags.foreach(println)
}
}
org.apache.spark.SparkException: Task not serializable
at org.apache.spark.util.ClosureCleaner$.ensureSerializable(ClosureCleaner.scala:298)
at org.apache.spark.util.ClosureCleaner$.org$apache$spark$util$ClosureCleaner$$clean(ClosureCleaner.scala:288)
at org.apache.spark.util.ClosureCleaner$.clean(ClosureCleaner.scala:108)
at org.apache.spark.SparkContext.clean(SparkContext.scala:2101)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:370)
at org.apache.spark.rdd.RDD$$anonfun$map$1.apply(RDD.scala:369)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
at org.apache.spark.rdd.RDD.map(RDD.scala:369)
... 54 elided
Caused by: java.io.NotSerializableException: opennlp.tools.postag.POSModel
Serialization stack: