From ec7157131afb3cce10e1e2c3c1bcdfc9ca99b44b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20=C5=BBukowski?= Date: Sun, 23 Dec 2018 01:29:02 +0100 Subject: [PATCH] Size estimation method in JoinOptimizer changed to more widely accepted. Use org.apache.spark.util.SizeEstimator instead of jdk.nashorn.internal.ir.debug.ObjectSizeCalculator which is supported only on HotSpot VM --- .../org/biodatageeks/rangejoins/optimizer/JoinOptimizer.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/biodatageeks/rangejoins/optimizer/JoinOptimizer.scala b/src/main/scala/org/biodatageeks/rangejoins/optimizer/JoinOptimizer.scala index c7ccb90a..11f9b618 100644 --- a/src/main/scala/org/biodatageeks/rangejoins/optimizer/JoinOptimizer.scala +++ b/src/main/scala/org/biodatageeks/rangejoins/optimizer/JoinOptimizer.scala @@ -1,6 +1,6 @@ package org.biodatageeks.rangejoins.optimizer -import jdk.nashorn.internal.ir.debug.ObjectSizeCalculator +import org.apache.spark.util.SizeEstimator import org.apache.spark.SparkContext import org.apache.spark.rdd.RDD import org.apache.spark.util.SizeEstimator @@ -20,7 +20,7 @@ class JoinOptimizer(sc: SparkContext, rdd: RDD[IntervalWithRow[Int]], rddCount : private def estimateBroadcastSize(rdd: RDD[IntervalWithRow[Int]], rddCount: Long): Long = { - (ObjectSizeCalculator.getObjectSize(rdd.first()) * rddCount) /10 + (SizeEstimator.estimate(rdd.first()) * rddCount) /10 //FIXME: Do not know why the size ~10x the actual size is- Spark row representation or getObject size in bits??? }