From 3e924c83dfe465f682425dd0aa851268719a677c Mon Sep 17 00:00:00 2001 From: samarthbhargav Date: Wed, 11 May 2016 14:19:16 +0530 Subject: [PATCH 1/3] Added gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb5a316 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +target From 524430b26b7f27f38cadf9595c0147c9271eb48b Mon Sep 17 00:00:00 2001 From: samarthbhargav Date: Sun, 15 May 2016 11:05:11 +0530 Subject: [PATCH 2/3] Added JsonLoader --- .gitignore | 3 + .../inferlytics/druidlet/loader/Loader.java | 21 ++- .../druidlet/loader/impl/JsonLoader.java | 145 ++++++++++++++++++ .../druidlet/loader/impl/JsonLoaderTest.java | 40 +++++ src/test/resources/testJsonLoader.json | 3 + 5 files changed, 208 insertions(+), 4 deletions(-) create mode 100644 src/main/java/com/inferlytics/druidlet/loader/impl/JsonLoader.java create mode 100644 src/test/java/com/inferlytics/druidlet/loader/impl/JsonLoaderTest.java create mode 100644 src/test/resources/testJsonLoader.json diff --git a/.gitignore b/.gitignore index eb5a316..64af8b7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,4 @@ target +*.iml +*.iws +*.ipr diff --git a/src/main/java/com/inferlytics/druidlet/loader/Loader.java b/src/main/java/com/inferlytics/druidlet/loader/Loader.java index 1679421..7c4c62f 100644 --- a/src/main/java/com/inferlytics/druidlet/loader/Loader.java +++ b/src/main/java/com/inferlytics/druidlet/loader/Loader.java @@ -8,24 +8,30 @@ package com.inferlytics.druidlet.loader; import com.inferlytics.druidlet.loader.impl.CSVLoader; +import com.inferlytics.druidlet.loader.impl.JsonLoader; import io.druid.data.input.InputRow; import java.io.Reader; import java.util.List; + /** * Abstract class which acts as an interface for loading files onto a QueryableIndex * Must be implemented to support CSV, TSV, JSON, XML, etc. */ -public abstract class Loader implements Iterable { +public abstract class Loader implements Iterable +{ protected List dimensions; protected String timestampDimension; - public Loader(List dims, String ts) { + + public Loader( List dims, String ts ) + { this.dimensions = dims; this.timestampDimension = ts; } + /** * CSVLoader implementation of the Loader * @@ -35,7 +41,14 @@ public Loader(List dims, String ts) { * @param timestampDimension Timestamp dimension * @return A new CSVLoader to the CSV file specified by the reader */ - public static Loader csv(Reader reader, List columns, List dimensions, String timestampDimension) { - return new CSVLoader(reader, columns, dimensions, timestampDimension); + public static Loader csv( Reader reader, List columns, List dimensions, String timestampDimension ) + { + return new CSVLoader( reader, columns, dimensions, timestampDimension ); + } + + + public static Loader json( Reader reader, List dimensions, String timestampDimension ) + { + return new JsonLoader(reader, dimensions, timestampDimension); } } diff --git a/src/main/java/com/inferlytics/druidlet/loader/impl/JsonLoader.java b/src/main/java/com/inferlytics/druidlet/loader/impl/JsonLoader.java new file mode 100644 index 0000000..d265b17 --- /dev/null +++ b/src/main/java/com/inferlytics/druidlet/loader/impl/JsonLoader.java @@ -0,0 +1,145 @@ +package com.inferlytics.druidlet.loader.impl; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.inferlytics.druidlet.loader.Loader; +import io.druid.data.input.InputRow; +import io.druid.data.input.MapBasedInputRow; +import org.joda.time.DateTime; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + + +/** + * Reads JSON files with the given column headers, generates InputRow + * objects with the given dimensions and timestampDimension + * Created by Samarth Bhargav on 5/11/16. + */ +public class JsonLoader extends Loader +{ + private static final TypeReference> TYPE_REFERENCE = new TypeReference>() + { + }; + + private final Reader reader; + + + /** + * Create a new JSON reader + * @param reader the reader to read from + * @param dimensions the list of dimensions + * @param timestampDimension the time stamp dimensions + */ + public JsonLoader( Reader reader, List dimensions, String timestampDimension ) + { + super( dimensions, timestampDimension ); + this.reader = reader; + } + + + @Override public Iterator iterator() + { + return new JsonReaderIterator( this.reader, this.dimensions, this.timestampDimension ); + } + + + /** + * Iterator class + */ + private class JsonReaderIterator implements Iterator, AutoCloseable + { + private final BufferedReader reader; + private final List dimensions; + private final String timestampDimension; + private final ObjectMapper objectMapper; + private String nextLine; + + + /** + * Initialize a new JsonReaderIterator + * @param reader the reader + * @param dimensions the list of dimensions + * @param timestampDimension the time stamp dimension + */ + public JsonReaderIterator( Reader reader, List dimensions, String timestampDimension ) + { + this.reader = new BufferedReader( reader ); + this.dimensions = dimensions; + this.timestampDimension = timestampDimension; + this.objectMapper = new ObjectMapper(); + } + + + /** + * Helper method to get the timestamp + * @param map the map i.e data + * @return the timestamp in the data if it is present, or the default value + */ + private Long getTimestamp( Map map ) + { + if ( timestampDimension == null ) { + return 1L; + } else { + return Long.valueOf( map.get( timestampDimension ).toString() ); + } + } + + + @Override public boolean hasNext() + { + try { + if ( nextLine == null && ( nextLine = reader.readLine() ) == null ) { + close(); + return false; + } else { + return true; + } + } catch ( IOException e ) { + e.printStackTrace(); + try { + close(); + } catch ( IOException e1 ) { + e1.printStackTrace(); + } + return false; + } + } + + + @Override public InputRow next() + { + if ( !hasNext() ) { + return null; + } + Map map = null; + try { + map = this.objectMapper.readValue( nextLine, TYPE_REFERENCE ); + } catch ( IOException e ) { + e.printStackTrace(); + } + nextLine = null; + if ( map == null ) { + return next(); + } + return new MapBasedInputRow( getTimestamp( map ), dimensions, map ); + } + + + @Override public void remove() + { + throw new UnsupportedOperationException(); + } + + + @Override public void close() throws IOException + { + this.reader.close(); + } + } +} diff --git a/src/test/java/com/inferlytics/druidlet/loader/impl/JsonLoaderTest.java b/src/test/java/com/inferlytics/druidlet/loader/impl/JsonLoaderTest.java new file mode 100644 index 0000000..cb2cda3 --- /dev/null +++ b/src/test/java/com/inferlytics/druidlet/loader/impl/JsonLoaderTest.java @@ -0,0 +1,40 @@ +package com.inferlytics.druidlet.loader.impl; + +import io.druid.data.input.InputRow; +import org.testng.annotations.Test; + +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.testng.Assert.*; + + +/** + * Tests for JsonLoader + * Created by Samarth Bhargav on 5/15/16. + */ +public class JsonLoaderTest +{ + @Test public void testJsonLoader() throws Exception + { + JsonLoader jsonLoader = new JsonLoader( + new InputStreamReader( JsonLoaderTest.class.getClassLoader().getResourceAsStream( "testJsonLoader.json" ) ), + Arrays.asList( "dim1", "dim2", "dim3" ), "timestamp" ); + List inputRowList = new ArrayList<>(); + + for ( InputRow inputRow : jsonLoader ) { + inputRowList.add( inputRow ); + } + + assertEquals( 3, inputRowList.size() ); + + for ( int i = 0; i < inputRowList.size(); i++ ) { + InputRow inputRow = inputRowList.get( i ); + for ( String dim : inputRow.getDimensions() ) { + assertEquals( String.valueOf( i + 1 ), inputRow.getDimension( dim ).get( 0 ) ); + } + } + } +} \ No newline at end of file diff --git a/src/test/resources/testJsonLoader.json b/src/test/resources/testJsonLoader.json new file mode 100644 index 0000000..4822713 --- /dev/null +++ b/src/test/resources/testJsonLoader.json @@ -0,0 +1,3 @@ +{"dim1": "1", "dim2": "1", "dim3": "1", "timestamp": 10000} +{"dim1": "2", "dim2": "2", "dim3": "2", "timestamp": 20000} +{"dim1": "3", "dim2": "3", "dim3": "3", "timestamp": 30000} From c51c0d091a8299be1ae0073c2ae6d5ec8e6bd94d Mon Sep 17 00:00:00 2001 From: samarthbhargav Date: Tue, 24 May 2016 15:59:54 +0530 Subject: [PATCH 3/3] Removed gitignore --- .gitignore | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 .gitignore diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 64af8b7..0000000 --- a/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -target -*.iml -*.iws -*.ipr