diff --git a/components/Python/fileConnectors/file_to_dataframe/__init__.py b/components/Python/fileConnectors/file_to_dataframe/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/components/Python/fileConnectors/file_to_dataframe/component.json b/components/Python/fileConnectors/file_to_dataframe/component.json new file mode 100644 index 0000000..bf22bdc --- /dev/null +++ b/components/Python/fileConnectors/file_to_dataframe/component.json @@ -0,0 +1,38 @@ +{ + "engineType": "Python", + "language": "Python", + "userStandalone": false, + "name": "file_to_dataframe", + "label": "Source File to DataFrame", + "program": "main.py", + "componentClass": "MCenterComponentAdapter", + "modelBehavior": "Auxiliary", + "useMLOps": true, + "inputInfo": [{ + "description": "File to read contents", + "label": "File-Name", + "defaultComponent": "", + "type": "str", + "group": "data" + }], + "outputInfo": [ + { + "description": "Pandas Dataframe", + "label": "dataframe", + "defaultComponent": "", + "type": "dataframe", + "group": "data" + } + ], + "group": "Connectors", + "arguments": [ + { + "key": "file-path", + "label": "Dataset file to read", + "type": "str", + "description": "File to use for loading DataSet into DataFrame", + "optional": true + } + ], + "version": 1 +} diff --git a/components/Python/fileConnectors/file_to_dataframe/main.py b/components/Python/fileConnectors/file_to_dataframe/main.py new file mode 100644 index 0000000..385ff52 --- /dev/null +++ b/components/Python/fileConnectors/file_to_dataframe/main.py @@ -0,0 +1,48 @@ +from __future__ import print_function + +import argparse +import sys +import time +import os +import pandas + +from parallelm.components import ConnectableComponent +from parallelm.mlops.stats.multi_line_graph import MultiLineGraph +from parallelm.mlops import mlops as mlops + +class MCenterComponentAdapter(ConnectableComponent): + """ + Adapter for read_file_to_df + """ + + def __init__(self, engine): + super(self.__class__, self).__init__(engine) + + def _materialize(self, parent_data_objs, user_data): + if len(parent_data_objs) is not 0: + file_path = str(parent_data_objs[0]) + else: + file_path = self._params.get('file-path') + return [read_file_to_df(self, file_path)] + + +def read_file_to_df(self, filepath): + """ + Read file and return DataFrame + """ + mlops.init() + if not os.path.exists(filepath): + self._logger.info("stderr- failed to find {}".format(filepath), file=sys.stderr) + raise Exception("file path does not exist: {}".format(filepath)) + + test_data = pandas.read_csv(filepath) + mlops.done() + return test_data + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--file-path", default='/tmp/test-data.csv', help="Dataset to read") + options = parser.parse_args() + return options +