From d5a85f44df7ec42fd483b1a60c31cf7e8ed40d2f Mon Sep 17 00:00:00 2001
From: Alex Dean <alexander.dean@keplarllp.com>
Date: Sun, 7 Apr 2013 01:55:17 +0100
Subject: [PATCH 1/4] Changed setFilesystem to work with Amazon EMR/S3 paths as
 well

---
 src/main/java/com/m6d/filecrush/crush/Crush.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/main/java/com/m6d/filecrush/crush/Crush.java b/src/main/java/com/m6d/filecrush/crush/Crush.java
index 96f5072..70ffd83 100644
--- a/src/main/java/com/m6d/filecrush/crush/Crush.java
+++ b/src/main/java/com/m6d/filecrush/crush/Crush.java
@@ -574,7 +574,7 @@ public int run(String[] args) throws Exception {
 			return 0;
 		}
 
-		setFileSystem(FileSystem.get(job));
+		setFileSystem(srcDir.getFileSystem(job));
 
 		FileStatus status = fs.getFileStatus(srcDir);
 

From 51737369bf83bb227ad47fb1740ebbc9042d7320 Mon Sep 17 00:00:00 2001
From: Alex Dean <alexander.dean@keplarllp.com>
Date: Sun, 7 Apr 2013 01:55:42 +0100
Subject: [PATCH 2/4] Fixed --input-format and --output-format CLI options

---
 README | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/README b/README
index 862f0b0..8879e14 100644
--- a/README
+++ b/README
@@ -159,8 +159,8 @@ Now we try an example using the directory options. Say we invoke the crush like
   Crush \
   --regex=.*/(.+) \
   --replacement=$1-${crush.timestamp}-${crush.task.num}-${crush.file.num} \
-  --input=sequence \
-  --output=sequence \
+  --input-format=sequence \
+  --output-format=sequence \
   /user/example/work/input /user/example/work/output 20100221175612
 
 The --regex and --replacement arguments are similar to the arguments passed to String.replaceAll(). The regex argument matches the final part of a directory path. For /user/example/work/input, it will match input. For /user/example/work/input/subdir, it will match subdir. For matching purposes, a directory path does not have a trailing slash. The replacement argument refers to the match group by number to rename the file. The result is:
@@ -179,8 +179,8 @@ The following invocation fails:
   Crush \
   --regex=.*/input \
   --replacement=input-${crush.timestamp}-${crush.task.num}-${crush.file.num} \
-  --input=sequence \ 
-  --output=sequence \
+  --input-format=sequence \ 
+  --output-format=sequence \
   /user/example/work/input /user/example/work/output 20100221175612
 
 Since we have specified some directory options, we must ensure that all directories in hierarchy rooted at the input argument have a matching regex (since the default regex is no longer applicable). In this invocation, there is no regex argument that matches /user/example/work/input/subdir. We must change it to:
@@ -188,12 +188,12 @@ Since we have specified some directory options, we must ensure that all director
   Crush \
   --regex=.*/input \
   --replacement=input-${crush.timestamp}-${crush.task.num}-${crush.file.num} \
-  --input=sequence \
-  --output=sequence \
+  --input-format=sequence \
+  --output-format=sequence \
   --regex=.*/subdir \
   --replacement=as-text-${crush.timestamp}-${crush.task.num}-${crush.file.num} \
-  --input=sequence \
-  --output=text \
+  --input-format=sequence \
+  --output-format=text \
   /user/example/work/input /user/example/work/output 20100221175612
 
 This will yield:

From f638179f32674dbff7bd61ec0e9726e9ec3bcec3 Mon Sep 17 00:00:00 2001
From: Alex Dean <alexander.dean@keplarllp.com>
Date: Sun, 7 Apr 2013 02:36:27 +0100
Subject: [PATCH 3/4] Version bump

---
 pom.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pom.xml b/pom.xml
index d414b55..31601c9 100644
--- a/pom.xml
+++ b/pom.xml
@@ -4,7 +4,7 @@
 	<groupId>com.m6d</groupId>
 	<artifactId>filecrush</artifactId>
 	<name>M6D App - Filecrush</name>
-	<version>2.2.2-SNAPSHOT</version>
+	<version>2.2.3-SNAPSHOT</version>
 	<description>filecrush utility</description>
 	<packaging>jar</packaging>
 	<properties>

From 78d2bd7df5ea81518da17567746f8b15ea2987e1 Mon Sep 17 00:00:00 2001
From: Alex Dean <alexander.dean@keplarllp.com>
Date: Sun, 7 Apr 2013 02:42:15 +0100
Subject: [PATCH 4/4] Fixed the filesystem lookups in the other files

---
 src/main/java/com/m6d/filecrush/clean/Clean.java            | 6 ++++--
 .../java/com/m6d/filecrush/crush/CountersInputFormat.java   | 2 +-
 src/main/java/com/m6d/filecrush/crush/CrushPartitioner.java | 5 +++--
 src/main/java/com/m6d/filecrush/crush/CrushReducer.java     | 5 +++--
 4 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/src/main/java/com/m6d/filecrush/clean/Clean.java b/src/main/java/com/m6d/filecrush/clean/Clean.java
index d68a518..234dfd9 100644
--- a/src/main/java/com/m6d/filecrush/clean/Clean.java
+++ b/src/main/java/com/m6d/filecrush/clean/Clean.java
@@ -51,8 +51,10 @@ public static void main(String[] args) throws Exception {
 	public int run(String[] args) throws Exception {
         conf = getConf();
        
+        Path targetDir = new Path(conf.get(TARGET_DIR));
+
 		try {
-			fs=FileSystem.get(getConf());
+			fs = targetDir.getFileSystem(conf);
 		} catch (IOException e) {
 			throw new RuntimeException("Could not open filesystem");
 		}
@@ -67,7 +69,7 @@ public int run(String[] args) throws Exception {
 			cutoff=now-targetAge;
 		}
 		
-        return cleanup (new Path(conf.get(TARGET_DIR)));
+        return cleanup(targetDir);
     
 	}
 	
diff --git a/src/main/java/com/m6d/filecrush/crush/CountersInputFormat.java b/src/main/java/com/m6d/filecrush/crush/CountersInputFormat.java
index 8ca5512..7097be7 100644
--- a/src/main/java/com/m6d/filecrush/crush/CountersInputFormat.java
+++ b/src/main/java/com/m6d/filecrush/crush/CountersInputFormat.java
@@ -50,7 +50,7 @@ public RecordReader<Counters, NullWritable> getRecordReader(InputSplit inputSpli
 		Path path = fSplit.getPath();
 		long length = fSplit.getLength();
 
-		FileSystem fs = FileSystem.get(jobconf);
+		FileSystem fs = path.getFileSystem(jobconf);
 
 		FSDataInputStream is = fs.open(path);
 
diff --git a/src/main/java/com/m6d/filecrush/crush/CrushPartitioner.java b/src/main/java/com/m6d/filecrush/crush/CrushPartitioner.java
index a65573f..57aa595 100644
--- a/src/main/java/com/m6d/filecrush/crush/CrushPartitioner.java
+++ b/src/main/java/com/m6d/filecrush/crush/CrushPartitioner.java
@@ -41,9 +41,10 @@ public void configure(JobConf job) {
 		bucketToPartition = new HashMap<Text, Integer>(100);
 
 		try {
-			FileSystem fs = FileSystem.get(job);
+			Path p = new Path(path);
+			FileSystem fs = p.getFileSystem(job);
 
-			Reader reader = new Reader(fs, new Path(path), job);
+			Reader reader = new Reader(fs, p, job);
 
 			Text bucket = new Text();
 			IntWritable partNum = new IntWritable();
diff --git a/src/main/java/com/m6d/filecrush/crush/CrushReducer.java b/src/main/java/com/m6d/filecrush/crush/CrushReducer.java
index 22b2f76..f9cbd97 100644
--- a/src/main/java/com/m6d/filecrush/crush/CrushReducer.java
+++ b/src/main/java/com/m6d/filecrush/crush/CrushReducer.java
@@ -127,7 +127,8 @@ public void configure(JobConf job) {
 		 * The files we write should be rooted in the "crush" subdir of the output directory to distinguish them from the files
 		 * created by the collector.
 		 */
-		outDirPath = new Path(outDirPath + "/crush").toUri().getPath();
+		Path outDirP = new Path(outDirPath + "/crush");
+		outDirPath = outDirP.toUri().getPath();
 
 		/*
 		 * Configure the regular expressions and replacements we use to convert dir names to crush output file names. Also get the
@@ -145,7 +146,7 @@ public void configure(JobConf job) {
 		placeHolderToValue.put("crush.timestamp", job.get("crush.timestamp"));
 
 		try {
-			fs = FileSystem.get(job);
+			fs = outDirP.getFileSystem(job);
 		} catch (RuntimeException e) {
 			throw e;
 		} catch (Exception e) {