From 7285b486ddcd9ac59dafa14ff9e26c8174df7699 Mon Sep 17 00:00:00 2001 From: Angad Singh Date: Sun, 29 Jun 2014 15:31:17 +0530 Subject: [PATCH] Set tmpjars for hadoop to be able to find hraven-core and other required libs --- .../java/com/twitter/hraven/Constants.java | 4 +++ .../com/twitter/hraven/etl/HadoopUtil.java | 27 +++++++++++++++++++ .../twitter/hraven/etl/JobFileProcessor.java | 3 +++ .../twitter/hraven/etl/JobFileRawLoader.java | 3 +++ 4 files changed, 37 insertions(+) create mode 100644 hraven-etl/src/main/java/com/twitter/hraven/etl/HadoopUtil.java diff --git a/hraven-core/src/main/java/com/twitter/hraven/Constants.java b/hraven-core/src/main/java/com/twitter/hraven/Constants.java index 9e8653a..7c8b712 100644 --- a/hraven-core/src/main/java/com/twitter/hraven/Constants.java +++ b/hraven-core/src/main/java/com/twitter/hraven/Constants.java @@ -426,4 +426,8 @@ public class Constants { /** name of the properties file used for cluster to cluster identifier mapping */ public static final String HRAVEN_CLUSTER_PROPERTIES_FILENAME = "hRavenClusters.properties"; + + public static final String HRAVEN_HDFS_LIB_PATH_CONF = "hraven.conf.libpath"; + + public static final String HADOOP_TMP_JARS_CONF = "tmpjars"; } diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/HadoopUtil.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/HadoopUtil.java new file mode 100644 index 0000000..23f13e4 --- /dev/null +++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/HadoopUtil.java @@ -0,0 +1,27 @@ +package com.twitter.hraven.etl; + +import java.io.IOException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import com.twitter.hraven.Constants; + +public class HadoopUtil { + public static void setTmpJars(String libPathConf, Configuration conf) throws IOException { + StringBuilder tmpjars = new StringBuilder(); + if (conf.get(libPathConf) != null) { + FileSystem fs = FileSystem.get(conf); + FileStatus[] files = fs.listStatus(new Path(conf.get(libPathConf))); + if (files != null) { + for (FileStatus file : files) { + if (!tmpjars.toString().isEmpty()) tmpjars = tmpjars.append(","); + tmpjars = tmpjars.append(file.getPath()); + } + conf.set(Constants.HADOOP_TMP_JARS_CONF, tmpjars.toString()); + } + } + } +} diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java index 6ace3d9..e230470 100644 --- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java +++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileProcessor.java @@ -603,6 +603,9 @@ private Job getProcessingJob(Configuration conf, Scan scan, int totalJobCount) // Note: must be BEFORE the job construction with the new mapreduce API. confClone.setBoolean("mapred.map.tasks.speculative.execution", false); + //Set tmpjars for hadoop to be able to find hraven-core and other required libs + HadoopUtil.setTmpJars(Constants.HRAVEN_HDFS_LIB_PATH_CONF, confClone); + // Set up job Job job = new Job(confClone, getJobName(totalJobCount)); diff --git a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileRawLoader.java b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileRawLoader.java index 19b4c87..36c2509 100644 --- a/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileRawLoader.java +++ b/hraven-etl/src/main/java/com/twitter/hraven/etl/JobFileRawLoader.java @@ -282,6 +282,9 @@ private boolean runRawLoaderJob(Configuration myHBaseConf, String input, // Note: must be BEFORE the job construction with the new mapreduce API. myHBaseConf.setBoolean("mapred.map.tasks.speculative.execution", false); + // Set tmpjars for hadoop to be able to find hraven-core and other required libs + HadoopUtil.setTmpJars(Constants.HRAVEN_HDFS_LIB_PATH_CONF, myHBaseConf); + // Set up job Job job = new Job(myHBaseConf, getJobName(totalJobCount)); job.setJarByClass(JobFileRawLoader.class);