Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@
[submodule "lightsaber_bench/LightSaber"]
path = lightsaber_bench/LightSaber
url = https://github.com/wzhao18/LightSaber.git
[submodule "dataset_util/protobuf"]
path = dataset_util/protobuf
url = https://github.com/protocolbuffers/protobuf
56 changes: 56 additions & 0 deletions dataset_util/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#### Dependencies and usage:

Install Boost Library:

```
git clone https://github.com/boostorg/boost.git && \
cd boost && \
git checkout boost-1.78.0 && \
git submodule init && \
git submodule update && \
./bootstrap.sh --prefix=/usr/local && \
./b2 install --prefix=/usr/local
```

Install Protobuf Library:

```
git clone https://github.com/protocolbuffers/protobuf.git && \
cd protobuf && \
git checkout v3.19.4 && \
git submodule init && \
git submodule update && \
cd cmake && \
mkdir build && \
cd build && \
cmake -DCMAKE_INSTALL_PREFIX=/usr/local .. && \
make -j$(nproc) && \
make install
```

Install Protobuf Java Runtime (protobuf-java-3.19.4.jar):

```
cd protobuf/java && \
mvn test && \
mvn package
```

Load Dataset with C++:

Build with Cmake directly. Protobuf files will be generated by cmake function `protobuf_generate`

Load Dataset with Java:

```
protoc -I /path/to/protos --java_out=. data.proto
javac -cp /path/to/protobuf-java-3.19.4.jar:. DataLoader.java
./dataset-parser | java -cp /path/to/protobuf-java-3.19.4.jar:. DataLoader
```

Load Dataset with C#:

```
protoc -I /path/to/protos --csharp_out=. data.proto
./dataset-parser | dotnet run
```
65 changes: 65 additions & 0 deletions dataset_util/include/csv_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#ifndef DATASET_UTIL_CSV_PARSER_H_
#define DATASET_UTIL_CSV_PARSER_H_

#include <fstream>

#include <data_parser.h>

using namespace std;

class csv_parser : public data_parser
{
protected:
typedef vector<string> csv_row;
virtual void decode(csv_row&, stream::stream_event*) = 0;

bool parse_csv_line(fstream &file, csv_row &row)
{
string line;

if (getline(file, line)) {
string word;
stringstream ss(line);

while (getline(ss, word, ',')) {
row.push_back(word);
}

return true;
}
return false;
}

void parse_csv_file(string file_name)
{
cerr << "Begin parsing " << file_name << endl;

std::fstream csv_file(file_name);
string line;
getline(csv_file, line);
long line_cnt = 0;

csv_row row;
while (true) {
if (!parse_csv_line(csv_file, row)) {
break;
}

stream::stream_event data;
decode(row, &data);
if (!write_serialized_to_ostream(data)) {
break;
}
row.clear();
line_cnt++;
}

csv_file.close();
cerr << "Parsing finished. Number of data points: " << line_cnt << endl;
}

public:
csv_parser(){}
};

#endif // DATASET_UTIL_CSV_PARSER_H_
38 changes: 38 additions & 0 deletions dataset_util/include/data_loader.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#ifndef DATASET_UTIL_DATA_LOADER_H_
#define DATASET_UTIL_DATA_LOADER_H_

#include <iostream>

#include <google/protobuf/io/coded_stream.h>
#include <google/protobuf/io/zero_copy_stream_impl.h>
#include <google/protobuf/util/delimited_message_util.h>

#include <stream_event.pb.h>

using namespace std;

class data_loader
{
private:
google::protobuf::io::IstreamInputStream raw_in;
google::protobuf::io::CodedInputStream coded_in;
public:
data_loader() :
raw_in(&cin),
coded_in(&raw_in)
{}
~data_loader(){}

bool load_data(stream::stream_event& event) {
bool clean_eof;
if (!google::protobuf::util::ParseDelimitedFromCodedStream(&event, &coded_in, &clean_eof)) {
if (!clean_eof) {
cerr << "Fail to parse data from coded input stream." << endl;
}
return false;
}
return true;
}
};

#endif // DATASET_UTIL_DATA_LOADER_H_
28 changes: 28 additions & 0 deletions dataset_util/include/data_parser.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#ifndef DATASET_UTIL_DATA_PARSER_H_
#define DATASET_UTIL_DATA_PARSER_H_

#include <google/protobuf/util/delimited_message_util.h>

#include <stream_event.pb.h>

using namespace std;

class data_parser
{
protected:
virtual bool parse() = 0;

bool write_serialized_to_ostream(stream::stream_event &t)
{
if (!google::protobuf::util::SerializeDelimitedToOstream(t, &cout)) {
cerr << "Fail to serialize data into output stream" << endl;
return false;
}
return true;
}

public:
data_parser(){}
};

#endif // DATASET_UTIL_DATA_PARSER_H_
68 changes: 68 additions & 0 deletions dataset_util/include/data_printer.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#ifndef DATASET_UTIL_DATA_PRINTER_H_
#define DATASET_UTIL_DATA_PRINTER_H_

#include <iostream>

#include <stream_event.pb.h>

using namespace std;

ostream& operator<< (ostream& out, stream::vibration const& vibration)
{
out << "vibration: ";
out << "channel_1: " << vibration.channel_1() << ", ";
out << "channel_2: " << vibration.channel_2();
return out;
}

ostream& operator<< (ostream& out, stream::taxi_trip const& trip)
{
out << "taxi_trip: ";
out << "medallion: " << trip.medallion() << ", ";
out << "hack_license: " << trip.hack_license() << ", ";
out << "vendor_id: " << trip.vendor_id() << ", ";
out << "rate_code: " << trip.rate_code() << ", ";
out << "store_and_fwd_flag: " << trip.store_and_fwd_flag() << ", ";
out << "passenger_count: " << trip.passenger_count() << ", ";
out << "trip_time_in_secs: " << trip.trip_time_in_secs() << ", ";
out << "trip_distance: " << trip.trip_distance() << ", ";
out << "pickup_longitude: " << trip.pickup_longitude() << ", ";
out << "pickup_latitude: " << trip.pickup_latitude() << ", ";
out << "dropoff_longitude: " << trip.dropoff_longitude() << ", ";
out << "dropoff_latitude: " << trip.dropoff_latitude();
return out;
}

ostream& operator<< (ostream& out, stream::taxi_fare const& fare)
{
out << "taxi_fare: ";
out << "medallion: " << fare.medallion() << ", ";
out << "hack_license: " << fare.hack_license() << ", ";
out << "vendor_id: " << fare.vendor_id() << ", ";
out << "payment_type: " << fare.payment_type() << ", ";
out << "fare_amount: " << fare.fare_amount() << ", ";
out << "surcharge: " << fare.surcharge() << ", ";
out << "mta_tax: " << fare.mta_tax() << ", ";
out << "tip_amount: " << fare.tip_amount() << ", ";
out << "tolls_amount: " << fare.tolls_amount() << ", ";
out << "total_amount: " << fare.total_amount();
return out;
}

ostream& operator<< (ostream& out, stream::stream_event const& event)
{
out << "(Partition: " << event.part_key() << ") ";
out << "Event: [" << event.st() << ", " << event.et() << "]: ";
if (event.has_taxi_trip()) {
out << event.taxi_trip();
} else if (event.has_taxi_fare()) {
out << event.taxi_fare();
} else if (event.has_vibration()) {
out << event.vibration();
} else {
out << "Unknown payload";
}
return out;
}

#endif // DATASET_UTIL_DATA_PRINTER_H_
Loading