From 633089f5800c3b7472a2d7237daebfc2adf322d8 Mon Sep 17 00:00:00 2001 From: Dhananjoy Das Date: Fri, 1 Mar 2019 16:13:05 -0800 Subject: [PATCH 1/3] [REF-5768] component to Read file into DataFrame --- .../file_to_dataframe/__init__.py | 0 .../file_to_dataframe/component.json | 38 +++++++++++++++ .../fileConnectors/file_to_dataframe/main.py | 47 +++++++++++++++++++ 3 files changed, 85 insertions(+) create mode 100644 components/Python/fileConnectors/file_to_dataframe/__init__.py create mode 100644 components/Python/fileConnectors/file_to_dataframe/component.json create mode 100644 components/Python/fileConnectors/file_to_dataframe/main.py diff --git a/components/Python/fileConnectors/file_to_dataframe/__init__.py b/components/Python/fileConnectors/file_to_dataframe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/components/Python/fileConnectors/file_to_dataframe/component.json b/components/Python/fileConnectors/file_to_dataframe/component.json new file mode 100644 index 0000000..bf22bdc --- /dev/null +++ b/components/Python/fileConnectors/file_to_dataframe/component.json @@ -0,0 +1,38 @@ +{ + "engineType": "Python", + "language": "Python", + "userStandalone": false, + "name": "file_to_dataframe", + "label": "Source File to DataFrame", + "program": "main.py", + "componentClass": "MCenterComponentAdapter", + "modelBehavior": "Auxiliary", + "useMLOps": true, + "inputInfo": [{ + "description": "File to read contents", + "label": "File-Name", + "defaultComponent": "", + "type": "str", + "group": "data" + }], + "outputInfo": [ + { + "description": "Pandas Dataframe", + "label": "dataframe", + "defaultComponent": "", + "type": "dataframe", + "group": "data" + } + ], + "group": "Connectors", + "arguments": [ + { + "key": "file-path", + "label": "Dataset file to read", + "type": "str", + "description": "File to use for loading DataSet into DataFrame", + "optional": true + } + ], + "version": 1 +} diff --git a/components/Python/fileConnectors/file_to_dataframe/main.py b/components/Python/fileConnectors/file_to_dataframe/main.py new file mode 100644 index 0000000..591d398 --- /dev/null +++ b/components/Python/fileConnectors/file_to_dataframe/main.py @@ -0,0 +1,47 @@ +from __future__ import print_function + +import argparse +import sys +import time +import os +import pandas + +from parallelm.components import ConnectableComponent +from parallelm.mlops.stats.multi_line_graph import MultiLineGraph +from parallelm.mlops import mlops as mlops + +class MCenterComponentAdapter(ConnectableComponent): + """ + Adapter for read_file_to_df + """ + + def __init__(self, engine): + super(self.__class__, self).__init__(engine) + + def _materialize(self, parent_data_objs, user_data): + file_path = str(parent_data_objs[0]) + if file_path is None: + file_path = self._params.get('file_path') + return [read_file_to_df(file_path)] + + +def read_file_to_df(filepath): + """ + Read file and return DataFrame + """ + mlops.init() + if not os.path.exists(filepath): + print("stderr- failed to find {}".format(filepath), file=sys.stderr) + raise Exception("file path does not exist: {}".format(filepath)) + + test_data = pandas.read_csv(filepath) + mlops.done() + return test_data + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--file-path", default='/tmp/test-data.csv', help="Dataset to read") + options = parser.parse_args() + return options + From 245dd1513044f98a5b3871e58276fbf691d6ed67 Mon Sep 17 00:00:00 2001 From: Dhananjoy Das Date: Fri, 1 Mar 2019 16:20:19 -0800 Subject: [PATCH 2/3] [REF-5768] component: Save DataFrame to file --- .../dataframe_to_file/__init__.py | 0 .../dataframe_to_file/component.json | 40 ++++++++++++++++ .../fileConnectors/dataframe_to_file/main.py | 47 +++++++++++++++++++ 3 files changed, 87 insertions(+) create mode 100644 components/Python/fileConnectors/dataframe_to_file/__init__.py create mode 100644 components/Python/fileConnectors/dataframe_to_file/component.json create mode 100644 components/Python/fileConnectors/dataframe_to_file/main.py diff --git a/components/Python/fileConnectors/dataframe_to_file/__init__.py b/components/Python/fileConnectors/dataframe_to_file/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/components/Python/fileConnectors/dataframe_to_file/component.json b/components/Python/fileConnectors/dataframe_to_file/component.json new file mode 100644 index 0000000..a977283 --- /dev/null +++ b/components/Python/fileConnectors/dataframe_to_file/component.json @@ -0,0 +1,40 @@ +{ + "engineType": "Python", + "language": "Python", + "userStandalone": false, + "name": "dataframe_to_file", + "label": "Sink DataFrame to File", + "program": "main.py", + "componentClass": "MCenterComponentAdapter", + "modelBehavior": "Auxiliary", + "useMLOps": true, + "inputInfo": [ + { + "description": "Pandas DataFrame", + "label": "dataframe", + "defaultComponent": "", + "type": "dataframe", + "group": "data" + } + ], + "outputInfo": [ + { + "description": "File name", + "label": "filename", + "defaultComponent": "", + "type": "str", + "group": "data" + } + ], + "group": "Sinks", + "arguments": [ + { + "key": "file-path", + "label": "save to file", + "type": "str", + "description": "Save DataFrame to file", + "optional": true + } + ], + "version": 1 +} diff --git a/components/Python/fileConnectors/dataframe_to_file/main.py b/components/Python/fileConnectors/dataframe_to_file/main.py new file mode 100644 index 0000000..b0bc22e --- /dev/null +++ b/components/Python/fileConnectors/dataframe_to_file/main.py @@ -0,0 +1,47 @@ +from __future__ import print_function + +import argparse +import sys +import time +import os +import pandas + +from parallelm.components import ConnectableComponent +from parallelm.mlops.stats.multi_line_graph import MultiLineGraph +from parallelm.mlops import mlops as mlops + +class MCenterComponentAdapter(ConnectableComponent): + """ + Adapter for df_to_file + """ + + def __init__(self, engine): + super(self.__class__, self).__init__(engine) + + def _materialize(self, parent_data_objs, user_data): + df_results = parent_data_objs[0] + results_path = self._params.get('file_path') + return [df_to_file(df_results, results_path)] + + +def df_to_file(df_predict_results, filepath): + """ + Save DataFrame to file + """ + prog_start_time = time.time() + mlops.init() + suffix_time_stamp = str(int(time.time())) + save_file = filepath + '.' + suffix_time_stamp + sfile = open(save_file, 'w+') + pandas.DataFrame(df_predict_results).to_csv(save_file) + sfile.close() + mlops.done() + return save_file + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--file-path", default='/tmp/results', help="Save DataFrame to file") + options = parser.parse_args() + return options + From 44b71bef0273ed07608d8a69042649a2e1eed6db Mon Sep 17 00:00:00 2001 From: Dhananjoy Das Date: Fri, 1 Mar 2019 18:29:46 -0800 Subject: [PATCH 3/3] [REF-576] minor: fix file-name input string --- components/Python/fileConnectors/dataframe_to_file/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/Python/fileConnectors/dataframe_to_file/main.py b/components/Python/fileConnectors/dataframe_to_file/main.py index b0bc22e..3bde499 100644 --- a/components/Python/fileConnectors/dataframe_to_file/main.py +++ b/components/Python/fileConnectors/dataframe_to_file/main.py @@ -20,7 +20,7 @@ def __init__(self, engine): def _materialize(self, parent_data_objs, user_data): df_results = parent_data_objs[0] - results_path = self._params.get('file_path') + results_path = self._params.get('file-path') return [df_to_file(df_results, results_path)]