Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions components/Python/Generic/dataframe_stats/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

57 changes: 57 additions & 0 deletions components/Python/Generic/dataframe_stats/component.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"version": 1,
"engineType": "Generic",
"language": "Python",
"userStandalone": false,
"name": "dataframe_stats",
"label": "DataFrame Statistics",
"description": "Generate statistics on the input dataframe.",
"program": "dataframe_stats.py",
"componentClass": "MCenterStatsComponentAdapter",
"modelBehavior": "Auxiliary",
"useMLOps": true,
"inputInfo": [
{
"description": "In Pandas Dataframe",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In->Input

"label": "dataframe",
"defaultComponent": "",
"type": "dataframe",
"group": "data"
}
],
"outputInfo": [
{
"description": "Out Pandas Dataframe",
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out->Output

"label": "dataframe",
"defaultComponent": "",
"type": "dataframe",
"group": "data"
}
],
"group": "FeatureEng",
"arguments": [
{
"key": "dataframe_is",
"label": "Dataframe Is",
"description": "What is this dataframe represents",
"type": "string",
"uiType": "select",
"options": [
{
"label": "Input Data",
"value": "input_data"
},
{
"label": "Categorical Predictions Probabilities",
"value": "categorical_predictions_probabilities"
},
{
"label": "Other",
"value": "other"
}
],
"defaultValue": "input_data",
"optional": false
}
]
}
44 changes: 44 additions & 0 deletions components/Python/Generic/dataframe_stats/dataframe_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from __future__ import print_function

from parallelm.components import ConnectableComponent
from parallelm.mlops import mlops as mlops
from parallelm.mlops.stats.bar_graph import BarGraph


class MCenterStatsComponentAdapter(ConnectableComponent):
def __init__(self, engine):
super(self.__class__, self).__init__(engine)

def _materialize(self, parent_data_objs, user_data):
mlops.init()

df = parent_data_objs[0]
dataframe_is = self._params.get("dataframe_is", "input_data")

if dataframe_is == "input_data":
self._handle_input_data(df)
elif dataframe_is == "categorical_predictions_probabilities":
self._handle_categorical_predictions(df)
elif dataframe_is == "other":
pass
else:
self._logger("Error: argument value is not supported: {}".format(dataframe_is))

mlops.done()

return[df]

def _handle_input_data(self, df):
mlops.set_data_distribution_stat(df)

def _handle_categorical_predictions(self, df):

df_max_col = df.idxmax(axis=1)
series_value_count = df_max_col.value_counts(normalize=True)

col_values = []
for col in df.columns:
col_values.append(series_value_count.at[col])

bg = BarGraph().name("Categorical Prediction Distribution").cols(list(df.columns)).data(col_values)
mlops.set_stat(bg)
1 change: 1 addition & 0 deletions components/Python/Generic/file_to_dataframe/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@

38 changes: 38 additions & 0 deletions components/Python/Generic/file_to_dataframe/component.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"version": 1,
"engineType": "Generic",
"language": "Python",
"userStandalone": false,
"name": "file_to_dataframe",
"label": "Source File to DataFrame",
"program": "file_to_dataframe.py",
"componentClass": "MCenterComponentAdapter",
"modelBehavior": "Auxiliary",
"useMLOps": true,
"inputInfo": [{
"description": "File to read contents",
"label": "filename",
"defaultComponent": "",
"type": "str",
"group": "data"
}],
"outputInfo": [
{
"description": "Pandas Dataframe",
"label": "dataframe",
"defaultComponent": "",
"type": "dataframe",
"group": "data"
}
],
"group": "Connectors",
"arguments": [
{
"key": "filename",
"label": "Dataset file to read",
"type": "str",
"description": "File to use for loading DataSet into DataFrame",
"optional": true
}
]
}
38 changes: 38 additions & 0 deletions components/Python/Generic/file_to_dataframe/file_to_dataframe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from __future__ import print_function

import sys
import os
import pandas

from parallelm.components import ConnectableComponent


class MCenterComponentAdapter(ConnectableComponent):
"""
Adapter for read_file_to_df
"""

def __init__(self, engine):
super(self.__class__, self).__init__(engine)

def _materialize(self, parent_data_objs, user_data):
if len(parent_data_objs) is not 0:
file_path = str(parent_data_objs[0])
else:
file_path = self._params.get('filename')

self._logger.info("file: {}".format(file_path))
df = self.read_file_to_df(file_path)
return [df]

def read_file_to_df(self, filepath):
"""
Read file and return DataFrame
"""

if not os.path.exists(filepath):
self._logger.info("stderr- failed to find {}".format(filepath), file=sys.stderr)
raise Exception("file path does not exist: {}".format(filepath))

df = pandas.read_csv(filepath)
return df
Empty file.
30 changes: 30 additions & 0 deletions components/Python/Generic/string-sink/component.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
{
"version": 1,
"engineType": "Generic",
"userStandalone": false,
"language": "Python",
"name": "string-sink",
"label": "Simple String Sink",
"program": "string_sink.py",
"componentClass": "StringSink",
"group": "Sinks",
"useMLOps": true,
"inputInfo": [
{
"description": "String",
"label": "string",
"defaultComponent": "",
"type": "str",
"group": "data"
}],
"outputInfo": [],
"arguments": [
{
"key": "expected-value",
"label": "Expected value to compare to",
"type": "str",
"description": "This is the value expected as input. If no value is provided no check is done",
"optional": true
}
]
}
15 changes: 15 additions & 0 deletions components/Python/Generic/string-sink/string_sink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from parallelm.components import ConnectableComponent


class StringSink(ConnectableComponent):

def __init__(self, engine):
super(self.__class__, self).__init__(engine)

def _materialize(self, parent_data_objs, user_data):
expected_str_value = self._params.get('expected-value', "")
actual_value = parent_data_objs[0]

if len(expected_str_value) > 0 and expected_str_value != actual_value:
raise Exception("Actual [{}] != Expected [{}]".format(actual_value, expected_str_value))
return []
Empty file.
31 changes: 31 additions & 0 deletions components/Python/Generic/string-source/component.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"version": 1,
"engineType": "Generic",
"userStandalone": false,
"language": "Python",
"name": "string-source",
"label": "Simple String Source",
"program": "string_source.py",
"componentClass": "StringSource",
"group": "Connectors",
"useMLOps": true,
"inputInfo": [],
"outputInfo": [
{
"description": "String",
"label": "string",
"defaultComponent": "",
"type": "str",
"group": "data"
}
],
"arguments": [
{
"key": "value",
"type": "str",
"label": "String value",
"description": "String value to provide as output",
"optional": false
}
]
}
11 changes: 11 additions & 0 deletions components/Python/Generic/string-source/string_source.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from parallelm.components import ConnectableComponent


class StringSource(ConnectableComponent):

def __init__(self, engine):
super(self.__class__, self).__init__(engine)

def _materialize(self, parent_data_objs, user_data):
str_value = self._params.get('value', "default-string-value")
return [str_value]