Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Empty file.
38 changes: 38 additions & 0 deletions components/Python/fileConnectors/file_to_dataframe/component.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"engineType": "Python",
"language": "Python",
"userStandalone": false,
"name": "file_to_dataframe",
"label": "Source File to DataFrame",
"program": "main.py",
"componentClass": "MCenterComponentAdapter",
"modelBehavior": "Auxiliary",
"useMLOps": true,
"inputInfo": [{
"description": "File to read contents",
"label": "File-Name",
"defaultComponent": "",
"type": "str",
"group": "data"
}],
"outputInfo": [
{
"description": "Pandas Dataframe",
"label": "dataframe",
"defaultComponent": "",
"type": "dataframe",
"group": "data"
}
],
"group": "Connectors",
"arguments": [
{
"key": "file-path",
"label": "Dataset file to read",
"type": "str",
"description": "File to use for loading DataSet into DataFrame",
"optional": true
}
],
"version": 1
}
48 changes: 48 additions & 0 deletions components/Python/fileConnectors/file_to_dataframe/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
from __future__ import print_function

import argparse
import sys
import time
import os
import pandas

from parallelm.components import ConnectableComponent
from parallelm.mlops.stats.multi_line_graph import MultiLineGraph
from parallelm.mlops import mlops as mlops

class MCenterComponentAdapter(ConnectableComponent):
"""
Adapter for read_file_to_df
"""

def __init__(self, engine):
super(self.__class__, self).__init__(engine)

def _materialize(self, parent_data_objs, user_data):
if len(parent_data_objs) is not 0:
file_path = str(parent_data_objs[0])
else:
file_path = self._params.get('file-path')
return [read_file_to_df(self, file_path)]


def read_file_to_df(self, filepath):
"""
Read file and return DataFrame
"""
mlops.init()
if not os.path.exists(filepath):
self._logger.info("stderr- failed to find {}".format(filepath), file=sys.stderr)
raise Exception("file path does not exist: {}".format(filepath))

test_data = pandas.read_csv(filepath)
mlops.done()
return test_data


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--file-path", default='/tmp/test-data.csv', help="Dataset to read")
options = parser.parse_args()
return options