From 84154e0a6377136072ff10f6e965bca061450cc1 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Fri, 14 Oct 2022 17:11:35 -0700
Subject: [PATCH 01/20] WIP

---
 src/pipelines/azureml/lightgbm_inferencing.py |  11 +
 .../data_processing/generate_data/generate.py |   3 +-
 .../inferencing/lightgbm_ort/.amlignore       |   4 +
 .../lightgbm_ort/default.dockerfile           |  27 +++
 src/scripts/inferencing/lightgbm_ort/score.py | 227 ++++++++++++++++++
 .../lightgbm_ort/spec.additional_includes     |   2 +
 .../inferencing/lightgbm_ort/spec.yaml        |  54 +++++
 7 files changed, 327 insertions(+), 1 deletion(-)
 create mode 100644 src/scripts/inferencing/lightgbm_ort/.amlignore
 create mode 100644 src/scripts/inferencing/lightgbm_ort/default.dockerfile
 create mode 100644 src/scripts/inferencing/lightgbm_ort/score.py
 create mode 100644 src/scripts/inferencing/lightgbm_ort/spec.additional_includes
 create mode 100644 src/scripts/inferencing/lightgbm_ort/spec.yaml

diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py
index b82a9690..f9cfc171 100644
--- a/src/pipelines/azureml/lightgbm_inferencing.py
+++ b/src/pipelines/azureml/lightgbm_inferencing.py
@@ -67,6 +67,7 @@ class lightgbm_inferencing_config: # pylint: disable=invalid-name
 lightgbm_python_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_python", "spec.yaml"))
 lightgbm_c_api_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_c_api", "spec.yaml"))
 lightgbm_ray_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_ray", "spec.yaml"))
+lightgbm_ort_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_ort", "spec.yaml"))
 custom_win_cli_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "custom_win_cli", "spec.yaml"))
 treelite_compile_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "model_transformation", "treelite_compile", "spec.yaml"))
 treelite_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "treelite_python", "spec.yaml"))
@@ -176,6 +177,16 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
             )
             inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
 
+        elif variant.framework == "lightgbm_ort":
+            # call module with all the right arguments
+            inferencing_step = lightgbm_ort_score_module(
+                data = data,
+                model = model,
+                verbose = False,
+                custom_properties = custom_properties
+            )
+            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+
         else:
             raise NotImplementedError(f"framework {variant.framework} not implemented (yet)")
 
diff --git a/src/scripts/data_processing/generate_data/generate.py b/src/scripts/data_processing/generate_data/generate.py
index ba5445d7..0f1a9163 100644
--- a/src/scripts/data_processing/generate_data/generate.py
+++ b/src/scripts/data_processing/generate_data/generate.py
@@ -252,7 +252,8 @@ def run(self, args, logger, metrics_logger, unknown_args):
         os.makedirs(args.output_train, exist_ok=True)
         os.makedirs(args.output_test, exist_ok=True)
         os.makedirs(args.output_inference, exist_ok=True)
-        os.makedirs(args.external_header, exist_ok=True)
+        if args.external_header:
+            os.makedirs(args.external_header, exist_ok=True)
 
 
         # transform delimiter
diff --git a/src/scripts/inferencing/lightgbm_ort/.amlignore b/src/scripts/inferencing/lightgbm_ort/.amlignore
new file mode 100644
index 00000000..749ccdaf
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/.amlignore
@@ -0,0 +1,4 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
new file mode 100644
index 00000000..2b807f43
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -0,0 +1,27 @@
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
+LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1"
+
+ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
+
+# Create conda environment
+RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
+    python=3.8 pip=20.2.4
+
+# Prepend path to AzureML conda environment
+ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
+
+# Install pip dependencies
+RUN HOROVOD_WITH_TENSORFLOW=1 \
+    pip install 'pandas>=1.1,<1.2' \
+                'numpy>=1.10,<1.20' \
+                'matplotlib==3.4.3' \
+                'scipy~=1.5.0' \
+                'scikit-learn~=0.24.1' \
+                'azureml-core==1.35.0' \
+                'azureml-defaults==1.35.0' \
+                'azureml-mlflow==1.35.0' \
+                'azureml-telemetry==1.35.0' \
+                'mpi4py==3.1.1' \
+                'onnxruntime==1.12.1' \
+                'onnxmltools==1.11.1' \
+                'onnxconverter-common==1.12.2'
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
new file mode 100644
index 00000000..23f00c7a
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -0,0 +1,227 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+"""
+LightGBM/Python inferencing script
+"""
+import os
+import sys
+import argparse
+import logging
+import time
+import timeit
+import numpy as np
+from distutils.util import strtobool
+import lightgbm
+
+import onnxruntime as ort
+from onnxmltools.convert import convert_lightgbm
+from onnxconverter_common.data_types import FloatTensorType
+
+COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+
+if COMMON_ROOT not in sys.path:
+    logging.info(f"Adding {COMMON_ROOT} to PYTHONPATH")
+    sys.path.append(str(COMMON_ROOT))
+
+# useful imports from common
+from common.components import RunnableScript
+from common.io import input_file_path
+
+
+class LightGBMONNXRTInferecingScript(RunnableScript):
+    def __init__(self):
+        super().__init__(
+            task="score",
+            framework="lightgbm",
+            framework_version="ONNXRT." + str(ort.__version__),
+        )
+
+    @classmethod
+    def get_arg_parser(cls, parser=None):
+        """Adds component/module arguments to a given argument parser.
+
+        Args:
+            parser (argparse.ArgumentParser): an argument parser instance
+
+        Returns:
+            ArgumentParser: the argument parser instance
+
+        Notes:
+            if parser is None, creates a new parser instance
+        """
+        # add generic arguments
+        parser = RunnableScript.get_arg_parser(parser)
+
+        group_i = parser.add_argument_group(f"Input Data [{__name__}:{cls.__name__}]")
+        group_i.add_argument(
+            "--data",
+            required=True,
+            type=input_file_path,
+            help="Inferencing data location (file path)",
+        )
+        group_i.add_argument(
+            "--data_format",
+            type=str,
+            choices=["CSV", "PARQUET", "PETASTORM"],
+            default="CSV",
+        )
+        group_i.add_argument(
+            "--model",
+            required=False,
+            type=input_file_path,
+            help="Exported model location (file path)",
+        )
+        group_i.add_argument(
+            "--output",
+            required=False,
+            default=None,
+            type=str,
+            help="Inferencing output location (file path)",
+        )
+
+        group_params = parser.add_argument_group(
+            f"Scoring parameters [{__name__}:{cls.__name__}]"
+        )
+        group_params.add_argument(
+            "--num_threads",
+            required=False,
+            default=1,
+            type=int,
+            help="number of threads",
+        )
+        group_params.add_argument(
+            "--predict_disable_shape_check",
+            required=False,
+            default=False,
+            type=strtobool,
+            help="See LightGBM documentation",
+        )
+
+        return parser
+
+    def run(self, args, logger, metrics_logger, unknown_args):
+        """Run script with arguments (the core of the component)
+
+        Args:
+            args (argparse.namespace): command line arguments provided to script
+            logger (logging.getLogger() for this script)
+            metrics_logger (common.metrics.MetricLogger)
+            unknown_args (list[str]): list of arguments not recognized during argparse
+        """
+        # record relevant parameters
+        metrics_logger.log_parameters(num_threads=args.num_threads)
+
+        # register logger for lightgbm logs
+        lightgbm.register_logger(logger)
+
+        # make sure the output argument exists
+        if args.output:
+            os.makedirs(args.output, exist_ok=True)
+            args.output = os.path.join(args.output, "predictions.txt")
+
+        logger.info(f"Loading model from {args.model}")
+        booster = lightgbm.Booster(model_file=args.model)
+
+        logger.info(f"Loading data for inferencing")
+        assert args.data_format == "CSV"
+        with metrics_logger.log_time_block("time_data_loading"):
+            # NOTE: this is bad, but allows for libsvm format (not just numpy)
+            inference_data = lightgbm.Dataset(
+                args.data, free_raw_data=False
+            ).construct()
+            inference_raw_data = inference_data.get_data()
+            if type(inference_raw_data) == str:
+                inference_raw_data = np.loadtxt(
+                    inference_raw_data, delimiter=","
+                ).astype(np.float32)[:, : inference_data.num_feature()]
+
+        logger.info(f"Converting model to ONNX")
+        onnx_input_types = [
+            (
+                "input",
+                FloatTensorType(
+                    [inference_data.num_data(), inference_data.num_feature()]
+                ),
+            )
+        ]
+        onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
+
+        logger.info(f"Creating inference session")
+        sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString(), sess)
+
+        # capture data shape as property
+        metrics_logger.set_properties(
+            inference_data_length=inference_data.num_data(),
+            inference_data_width=inference_data.num_feature(),
+        )
+
+        logger.info(f"Running .predict()")
+
+        batch_start_time = time.monotonic()
+        sessionml.run(
+            [sessionml.get_outputs()[0].name],
+            {sessionml.get_inputs()[0].name: inference_raw_data},
+        )
+
+        # onnxml_time = timeit.timeit(
+        #     "sessionml.run( [sessionml.get_outputs()[0].name],  {sessionml.get_inputs()[0].name: inference_raw_data} )",
+        #     number=10,
+        #     setup="from __main__ import sessionml, inference_raw_data",
+        # )
+        onnxml_time = timeit.timeit(
+            lambda: sessionml.run(
+                [sessionml.get_outputs()[0].name],
+                {sessionml.get_inputs()[0].name: inference_raw_data},
+            ),
+            number=10,
+        )
+        print(
+            "LGBM->ONNXML (CPU): {}".format(
+                num_classes, max_depth, n_estimators, onnxml_time
+            )
+        )
+
+        booster.num_trees
+        batch_start_time = time.monotonic()
+        predictions_array = booster.predict(
+            data=inference_raw_data,
+            num_threads=args.num_threads,
+            predict_disable_shape_check=bool(args.predict_disable_shape_check),
+        )
+        prediction_time = time.monotonic() - batch_start_time
+        metrics_logger.log_metric("time_inferencing", prediction_time)
+
+        # use helper to log latency with the right metric names
+        metrics_logger.log_inferencing_latencies(
+            [prediction_time],  # only one big batch
+            batch_length=inference_data.num_data(),
+            factor_to_usecs=1000000.0,  # values are in seconds
+        )
+
+        if args.output:
+            np.savetxt(
+                args.output,
+                predictions_array,
+                fmt="%f",
+                delimiter=",",
+                newline="\n",
+                header="",
+                footer="",
+                comments="# ",
+                encoding=None,
+            )
+
+
+def get_arg_parser(parser=None):
+    """To ensure compatibility with shrike unit tests"""
+    return LightGBMONNXRTInferecingScript.get_arg_parser(parser)
+
+
+def main(cli_args=None):
+    """To ensure compatibility with shrike unit tests"""
+    LightGBMONNXRTInferecingScript.main(cli_args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.additional_includes b/src/scripts/inferencing/lightgbm_ort/spec.additional_includes
new file mode 100644
index 00000000..13e7552d
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/spec.additional_includes
@@ -0,0 +1,2 @@
+../../../common/
+../../../../docker/
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
new file mode 100644
index 00000000..b0ca1315
--- /dev/null
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -0,0 +1,54 @@
+$schema: http://azureml/sdk-2-0/CommandComponent.json
+name: lightgbm_ort_score
+version: 0.0.1
+display_name: "LightGBM Inferencing (ONNX RT)"
+type: CommandComponent
+description: "LightGBM inferencing using the ONNX Runtime."
+is_deterministic: true
+inputs:
+  data:
+    type: AnyDirectory
+    description: directory to the inference data
+    optional: false
+  model:
+    type: AnyDirectory
+    description: directory to the model
+    optional: false
+  predict_disable_shape_check:
+    type: Boolean
+    description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"
+    default: False
+  n_threads:
+    type: Integer
+    optional: true
+  verbose:
+    type: Boolean
+    default: False
+  custom_properties:
+    type: String
+    description: additional custom tags for the job
+    optional: true
+
+outputs:
+  predictions:
+    type: AnyDirectory
+
+command: >-
+  python score.py
+  --data {inputs.data}
+  --model {inputs.model}
+  --output {outputs.predictions}
+  [--num_threads {inputs.n_threads}]
+  --predict_disable_shape_check {inputs.predict_disable_shape_check}
+  --verbose {inputs.verbose}
+  [--custom_properties {inputs.custom_properties}]
+  --cluster_auto_setup True
+
+environment:
+  docker:
+    build:
+      # file path is resolved after additional includes
+      dockerfile: file:./default.dockerfile
+  conda:
+    userManagedDependencies: true
+  os: Linux

From b449733bd71ebb15edf6a2136bfe43157d457369 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 17 Oct 2022 16:59:01 -0700
Subject: [PATCH 02/20] add onnx inference

---
 .../benchmarks/lightgbm-inferencing.yaml      |  4 ++
 conf/experiments/lightgbm-inferencing.yaml    |  4 ++
 .../lightgbm_ort/default.dockerfile           |  6 +++
 src/scripts/inferencing/lightgbm_ort/score.py | 40 +++++--------------
 4 files changed, 25 insertions(+), 29 deletions(-)

diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 188b7b0c..13874a6e 100644
--- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -14,6 +14,9 @@ experiment:
   name: "lightgbm_inferencing_dev"
   description: "something interesting to say about this"
 
+run:
+  submit: true
+
 lightgbm_inferencing_config:
   # name of your particular benchmark
   benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime!
@@ -80,6 +83,7 @@ lightgbm_inferencing_config:
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
     - framework: lightgbm_ray # ray implementation
+    - framework: lightgbm_ort # ONNX RT implementation
     - framework: treelite_python # v1.3.0
 
     # to use custom_win_cli, you need to compile your own binaries
diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml
index 2e68c4d3..f70d2d22 100644
--- a/conf/experiments/lightgbm-inferencing.yaml
+++ b/conf/experiments/lightgbm-inferencing.yaml
@@ -14,6 +14,9 @@ experiment:
   name: "lightgbm_inferencing_dev"
   description: "something interesting to say about this"
 
+run:
+  submit: true
+
 lightgbm_inferencing_config:
   # name of your particular benchmark
   benchmark_name: "benchmark-dev" # override this with a unique name
@@ -36,6 +39,7 @@ lightgbm_inferencing_config:
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
     - framework: lightgbm_ray # ray implementation
+    - framework: lightgbm_ort # ONNX RT implementation
     - framework: treelite_python # v1.3.0
     
     # to use custom_win_cli, you need to compile your own binaries
diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
index 2b807f43..01e5614c 100644
--- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -3,6 +3,8 @@ LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1"
 
 ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
+ARG lightgbm_version="3.3.0"
+
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
     python=3.8 pip=20.2.4
@@ -25,3 +27,7 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
                 'onnxruntime==1.12.1' \
                 'onnxmltools==1.11.1' \
                 'onnxconverter-common==1.12.2'
+
+# install lightgbm with mpi
+RUN pip install lightgbm==${lightgbm_version} \
+    pip install 'protobuf==3.20'
\ No newline at end of file
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index 23f00c7a..371fec02 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -148,7 +148,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
         onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
 
         logger.info(f"Creating inference session")
-        sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString(), sess)
+        sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString())
 
         # capture data shape as property
         metrics_logger.set_properties(
@@ -157,45 +157,27 @@ def run(self, args, logger, metrics_logger, unknown_args):
         )
 
         logger.info(f"Running .predict()")
-
         batch_start_time = time.monotonic()
-        sessionml.run(
+        predictions_array = sessionml.run(
             [sessionml.get_outputs()[0].name],
             {sessionml.get_inputs()[0].name: inference_raw_data},
-        )
+        )[0]
+        prediction_time = time.monotonic() - batch_start_time
+        metrics_logger.log_metric("time_inferencing", prediction_time)
 
+        # TODO: Discuss alternative?
         # onnxml_time = timeit.timeit(
-        #     "sessionml.run( [sessionml.get_outputs()[0].name],  {sessionml.get_inputs()[0].name: inference_raw_data} )",
+        #     lambda: sessionml.run(
+        #         [sessionml.get_outputs()[0].name],
+        #         {sessionml.get_inputs()[0].name: inference_raw_data},
+        #     ),
         #     number=10,
-        #     setup="from __main__ import sessionml, inference_raw_data",
         # )
-        onnxml_time = timeit.timeit(
-            lambda: sessionml.run(
-                [sessionml.get_outputs()[0].name],
-                {sessionml.get_inputs()[0].name: inference_raw_data},
-            ),
-            number=10,
-        )
-        print(
-            "LGBM->ONNXML (CPU): {}".format(
-                num_classes, max_depth, n_estimators, onnxml_time
-            )
-        )
-
-        booster.num_trees
-        batch_start_time = time.monotonic()
-        predictions_array = booster.predict(
-            data=inference_raw_data,
-            num_threads=args.num_threads,
-            predict_disable_shape_check=bool(args.predict_disable_shape_check),
-        )
-        prediction_time = time.monotonic() - batch_start_time
-        metrics_logger.log_metric("time_inferencing", prediction_time)
 
         # use helper to log latency with the right metric names
         metrics_logger.log_inferencing_latencies(
             [prediction_time],  # only one big batch
-            batch_length=inference_data.num_data(),
+            batch_length=len(inference_raw_data),
             factor_to_usecs=1000000.0,  # values are in seconds
         )
 

From ee19ce581fbf6a612f81b204cfb4c09b7da2b459 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 17 Oct 2022 18:10:37 -0700
Subject: [PATCH 03/20] wip

---
 src/scripts/inferencing/lightgbm_ort/score.py | 51 +++++++++++++++----
 1 file changed, 40 insertions(+), 11 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index 371fec02..c4a4a1e4 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -90,6 +90,13 @@ def get_arg_parser(cls, parser=None):
             type=int,
             help="number of threads",
         )
+        group_params.add_argument(
+            "--run_parallel",
+            required=False,
+            default=True,
+            type=bool,
+            help="number of threads",
+        )
         group_params.add_argument(
             "--predict_disable_shape_check",
             required=False,
@@ -140,15 +147,25 @@ def run(self, args, logger, metrics_logger, unknown_args):
         onnx_input_types = [
             (
                 "input",
-                FloatTensorType(
-                    [inference_data.num_data(), inference_data.num_feature()]
-                ),
+                FloatTensorType([1, inference_data.num_feature()]),
             )
         ]
         onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
 
         logger.info(f"Creating inference session")
-        sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString())
+        sess_options = ort.SessionOptions()
+        # sess_options.intra_op_num_threads = args.num_threads
+        sess_options.execution_mode = (
+            ort.ExecutionMode.ORT_PARALLEL
+            if args.run_parallel
+            else ort.ExecutionMode.ORT_SEQUENTIAL
+        )
+        sess_options.graph_optimization_level = (
+            ort.GraphOptimizationLevel.ORT_ENABLE_ALL
+        )
+        sessionml = ort.InferenceSession(
+            onnx_ml_model.SerializeToString(), sess_options
+        )
 
         # capture data shape as property
         metrics_logger.set_properties(
@@ -156,14 +173,26 @@ def run(self, args, logger, metrics_logger, unknown_args):
             inference_data_width=inference_data.num_feature(),
         )
 
+        # Warmup and compute results
+        for _ in range(100):
+            predictions_arraysessionml.run(
+                [sessionml.get_outputs()[0].name],
+                {sessionml.get_inputs()[0].name: inference_raw_data[0:1]},
+            )[0]
+
         logger.info(f"Running .predict()")
-        batch_start_time = time.monotonic()
-        predictions_array = sessionml.run(
-            [sessionml.get_outputs()[0].name],
-            {sessionml.get_inputs()[0].name: inference_raw_data},
-        )[0]
-        prediction_time = time.monotonic() - batch_start_time
-        metrics_logger.log_metric("time_inferencing", prediction_time)
+        time_inferencing_per_query = []
+        predictions_array = []
+        for i in range(len(inference_raw_data)):
+            batch_start_time = time.monotonic()
+            prediction = sessionml.run(
+                [sessionml.get_outputs()[0].name],
+                {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},
+            )[0]
+            prediction_time = time.monotonic() - batch_start_time
+            time_inferencing_per_query.append(prediction_time)
+            predictions_array.append(prediction)
+        metrics_logger.log_metric("time_inferencing", sum(prediction_time))
 
         # TODO: Discuss alternative?
         # onnxml_time = timeit.timeit(

From b0ea07c398619f8a13dba18e95113032a7fbaaee Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Tue, 18 Oct 2022 11:28:38 -0700
Subject: [PATCH 04/20] update onnx measurements technique

---
 .../lightgbm-inferencing-prod.yaml            | 51 +++++++++++++
 src/scripts/inferencing/lightgbm_ort/score.py | 76 +++++++++++++------
 2 files changed, 105 insertions(+), 22 deletions(-)
 create mode 100644 conf/experiments/lightgbm-inferencing-prod.yaml

diff --git a/conf/experiments/lightgbm-inferencing-prod.yaml b/conf/experiments/lightgbm-inferencing-prod.yaml
new file mode 100644
index 00000000..d2898f2d
--- /dev/null
+++ b/conf/experiments/lightgbm-inferencing-prod.yaml
@@ -0,0 +1,51 @@
+# This experiment runs multiple variants of lightgbm inferencing + treelite
+# on a given user-defined dataset and model
+#
+# to execute:
+# > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml
+
+defaults:
+  - aml: custom_prod
+  - compute: custom_prod
+
+### CUSTOM PARAMETERS ###
+
+experiment:
+  name: "lightgbm_inferencing_prod"
+  description: "something interesting to say about this"
+
+run:
+  submit: true
+
+lightgbm_inferencing_config:
+  # name of your particular benchmark
+  benchmark_name: "benchmark-prod" # override this with a unique name
+
+  # list all the data/model pairs to run inferencing with
+  tasks:
+    - data:
+        name: "NiR4_OFE_FR_NOFF_DATA"
+      model:
+        name: "NiR4_OFE_LGBM"
+    - data:
+        name: "NiR4_HRS_FR_NOFF_DATA"
+      model:
+        name: "NiR4_HRS_LGBM"
+
+  # list all inferencing frameworks and their builds
+  variants:
+    # - framework: lightgbm_python # v3.3.0 via pypi
+    # - framework: lightgbm_c_api # v3.3.0 with C API prediction
+    # - framework: lightgbm_c_api # v3.3.0 with C API prediction
+    #   build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
+    # - framework: lightgbm_c_api # v3.2.1 with C API prediction
+    #   build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
+    # - framework: lightgbm_c_api # v3.2.1 with C API prediction
+    #   build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
+    # - framework: lightgbm_ray # ray implementation
+    - framework: lightgbm_ort # ONNX RT implementation
+    - framework: treelite_python # v1.3.0
+    
+    # to use custom_win_cli, you need to compile your own binaries
+    # see src/scripts/inferencing/custom_win_cli/static_binaries/README.md
+    #- framework: custom_win_cli
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index c4a4a1e4..b67a8af2 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -128,7 +128,11 @@ def run(self, args, logger, metrics_logger, unknown_args):
             args.output = os.path.join(args.output, "predictions.txt")
 
         logger.info(f"Loading model from {args.model}")
-        booster = lightgbm.Booster(model_file=args.model)
+        # BUG: https://github.com/onnx/onnxmltools/issues/338
+        with open(args.model, "r") as mf:
+            model_str = mf.read()
+            model_str = model_str.replace("objective=lambdarank", "objective=regression")
+        booster = lightgbm.Booster(model_str=model_str)
 
         logger.info(f"Loading data for inferencing")
         assert args.data_format == "CSV"
@@ -150,7 +154,18 @@ def run(self, args, logger, metrics_logger, unknown_args):
                 FloatTensorType([1, inference_data.num_feature()]),
             )
         ]
+        onnx_batch_input_types = [
+            (
+                "input",
+                FloatTensorType(
+                    [inference_data.num_data(), inference_data.num_feature()]
+                ),
+            )
+        ]
         onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
+        onnx_ml_batch_model = convert_lightgbm(
+            booster, initial_types=onnx_batch_input_types
+        )
 
         logger.info(f"Creating inference session")
         sess_options = ort.SessionOptions()
@@ -166,6 +181,9 @@ def run(self, args, logger, metrics_logger, unknown_args):
         sessionml = ort.InferenceSession(
             onnx_ml_model.SerializeToString(), sess_options
         )
+        sessionml_batch = ort.InferenceSession(
+            onnx_ml_batch_model.SerializeToString(), sess_options
+        )
 
         # capture data shape as property
         metrics_logger.set_properties(
@@ -173,39 +191,53 @@ def run(self, args, logger, metrics_logger, unknown_args):
             inference_data_width=inference_data.num_feature(),
         )
 
+        logger.info(f"Running .predict()")
+
         # Warmup and compute results
         for _ in range(100):
-            predictions_arraysessionml.run(
+            sessionml.run(
                 [sessionml.get_outputs()[0].name],
                 {sessionml.get_inputs()[0].name: inference_raw_data[0:1]},
             )[0]
+        predictions_array = sessionml_batch.run(
+            [sessionml.get_outputs()[0].name],
+            {sessionml.get_inputs()[0].name: inference_raw_data},
+        )[0]
 
-        logger.info(f"Running .predict()")
         time_inferencing_per_query = []
-        predictions_array = []
-        for i in range(len(inference_raw_data)):
-            batch_start_time = time.monotonic()
-            prediction = sessionml.run(
+
+        timeit_loops = 10
+        onnxml_batch_time = timeit.timeit(
+            lambda: sessionml_batch.run(
                 [sessionml.get_outputs()[0].name],
-                {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},
-            )[0]
-            prediction_time = time.monotonic() - batch_start_time
+                {sessionml.get_inputs()[0].name: inference_raw_data},
+            ),
+            number=timeit_loops,
+        )
+        onnxml_batch_time /= timeit_loops
+
+        metrics_logger.log_metric("time_inferencing_batch", onnxml_batch_time)
+
+        for i in range(len(inference_raw_data)):
+            prediction_time = timeit.timeit(
+                lambda: sessionml.run(
+                    [sessionml.get_outputs()[0].name],
+                    {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},
+                ),
+                number=timeit_loops,
+            )
+            prediction_time /= timeit_loops
             time_inferencing_per_query.append(prediction_time)
-            predictions_array.append(prediction)
-        metrics_logger.log_metric("time_inferencing", sum(prediction_time))
-
-        # TODO: Discuss alternative?
-        # onnxml_time = timeit.timeit(
-        #     lambda: sessionml.run(
-        #         [sessionml.get_outputs()[0].name],
-        #         {sessionml.get_inputs()[0].name: inference_raw_data},
-        #     ),
-        #     number=10,
-        # )
+        metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_query))
 
         # use helper to log latency with the right metric names
         metrics_logger.log_inferencing_latencies(
-            [prediction_time],  # only one big batch
+            [onnxml_batch_time],  # only one big batch
+            batch_length=len(inference_raw_data),
+            factor_to_usecs=1000000.0,  # values are in seconds
+        )
+        metrics_logger.log_inferencing_latencies(
+            time_inferencing_per_query,  # only one big batch
             batch_length=len(inference_raw_data),
             factor_to_usecs=1000000.0,  # values are in seconds
         )

From 611da1b271f45911ea4354749720d1f4913b15a6 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Tue, 18 Oct 2022 16:42:52 -0700
Subject: [PATCH 05/20] wip

---
 .../benchmarks/lightgbm-inferencing.yaml      |  3 +-
 requirements.txt                              | 42 ++++++++++---------
 src/pipelines/azureml/lightgbm_inferencing.py | 11 +++++
 src/scripts/inferencing/lightgbm_ort/score.py | 34 ++++++---------
 .../inferencing/lightgbm_ort/spec.yaml        |  6 ++-
 5 files changed, 54 insertions(+), 42 deletions(-)

diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 13874a6e..18bd852e 100644
--- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -82,8 +82,9 @@ lightgbm_inferencing_config:
       build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
-    - framework: lightgbm_ray # ray implementation
+    # - framework: lightgbm_ray # ray implementation
     - framework: lightgbm_ort # ONNX RT implementation
+    - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation
     - framework: treelite_python # v1.3.0
 
     # to use custom_win_cli, you need to compile your own binaries
diff --git a/requirements.txt b/requirements.txt
index 9b4b581f..226c6181 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,28 +1,32 @@
 # benchmark common code
-mlflow==1.23.1
-omegaconf~=2.1
-mpi4py==3.1.1
-matplotlib==3.4.3
-psutil==5.8.0
+mlflow==1.29.0
+omegaconf==2.1.2
+mpi4py==3.1.3
+matplotlib==3.6.1
+psutil==5.9.3
 
 # frameworks
-ray==1.9.2
-lightgbm-ray==0.1.2
-lightgbm==3.3.1
-treelite==2.1.0
-treelite_runtime==2.1.0
-flaml==0.9.6
+ray==2.0.0
+lightgbm-ray==0.1.7
+lightgbm==3.3.3
+treelite==3.0.0
+treelite-runtime==3.0.0
+FLAML==1.0.13
 hpbandster==0.7.4
-ConfigSpace==0.5.0
-optuna==2.8.0
+ConfigSpace==0.6.0
+optuna==3.0.3
 
 # pipelines
-shrike[pipeline]==1.14.7
-azure-ml-component==0.9.4.post1  # for component dsl
-azureml-train-core==1.36.0  # for azureml.train.hyperdrive
-azureml-dataset-runtime==1.36.0  # to register dataset
-hydra-core~=1.0.3
-typing_extensions==4.0.1 # for hydra
+shrike[pipeline]==1.31.10
+azure-core==1.26.0
+azure-storage-blob==12.13.0
+azure-ml-component==0.9.13.post1
+azureml-train-core==1.37.0
+azureml-dataset-runtime==1.46.0
+hydra-core==1.0.4
+typing_extensions==4.4.0
+azureml-mlflow==1.46.0 
+mlflow-skinny==1.29.0
 
 # unit testing
 pytest==6.2.4
diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py
index f9cfc171..4a81fe43 100644
--- a/src/pipelines/azureml/lightgbm_inferencing.py
+++ b/src/pipelines/azureml/lightgbm_inferencing.py
@@ -187,6 +187,17 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
             )
             inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
 
+        elif variant.framework == "lightgbm_ort_multithread":
+            # call module with all the right arguments
+            inferencing_step = lightgbm_ort_score_module(
+                data = data,
+                model = model,
+                verbose = False,
+                run_parallel = True,
+                custom_properties = custom_properties
+            )
+            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+
         else:
             raise NotImplementedError(f"framework {variant.framework} not implemented (yet)")
 
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index b67a8af2..5ee427e9 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -86,14 +86,14 @@ def get_arg_parser(cls, parser=None):
         group_params.add_argument(
             "--num_threads",
             required=False,
-            default=1,
+            default=0,
             type=int,
             help="number of threads",
         )
         group_params.add_argument(
             "--run_parallel",
             required=False,
-            default=True,
+            default=False,
             type=bool,
             help="number of threads",
         )
@@ -131,7 +131,9 @@ def run(self, args, logger, metrics_logger, unknown_args):
         # BUG: https://github.com/onnx/onnxmltools/issues/338
         with open(args.model, "r") as mf:
             model_str = mf.read()
-            model_str = model_str.replace("objective=lambdarank", "objective=regression")
+            model_str = model_str.replace(
+                "objective=lambdarank", "objective=regression"
+            )
         booster = lightgbm.Booster(model_str=model_str)
 
         logger.info(f"Loading data for inferencing")
@@ -169,7 +171,14 @@ def run(self, args, logger, metrics_logger, unknown_args):
 
         logger.info(f"Creating inference session")
         sess_options = ort.SessionOptions()
-        # sess_options.intra_op_num_threads = args.num_threads
+
+        if args.num_threads > 0:
+            logger.info(f"Setting number of threads to {args.num_threads}")
+            sess_options.intra_op_num_threads = args.num_threads
+
+        if args.run_parallel:
+            logger.info(f"Creating multithreaded inference session")
+
         sess_options.execution_mode = (
             ort.ExecutionMode.ORT_PARALLEL
             if args.run_parallel
@@ -205,19 +214,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
         )[0]
 
         time_inferencing_per_query = []
-
         timeit_loops = 10
-        onnxml_batch_time = timeit.timeit(
-            lambda: sessionml_batch.run(
-                [sessionml.get_outputs()[0].name],
-                {sessionml.get_inputs()[0].name: inference_raw_data},
-            ),
-            number=timeit_loops,
-        )
-        onnxml_batch_time /= timeit_loops
-
-        metrics_logger.log_metric("time_inferencing_batch", onnxml_batch_time)
-
         for i in range(len(inference_raw_data)):
             prediction_time = timeit.timeit(
                 lambda: sessionml.run(
@@ -231,11 +228,6 @@ def run(self, args, logger, metrics_logger, unknown_args):
         metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_query))
 
         # use helper to log latency with the right metric names
-        metrics_logger.log_inferencing_latencies(
-            [onnxml_batch_time],  # only one big batch
-            batch_length=len(inference_raw_data),
-            factor_to_usecs=1000000.0,  # values are in seconds
-        )
         metrics_logger.log_inferencing_latencies(
             time_inferencing_per_query,  # only one big batch
             batch_length=len(inference_raw_data),
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
index b0ca1315..f97e94ce 100644
--- a/src/scripts/inferencing/lightgbm_ort/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -1,6 +1,6 @@
 $schema: http://azureml/sdk-2-0/CommandComponent.json
 name: lightgbm_ort_score
-version: 0.0.1
+version: 0.0.2
 display_name: "LightGBM Inferencing (ONNX RT)"
 type: CommandComponent
 description: "LightGBM inferencing using the ONNX Runtime."
@@ -21,6 +21,9 @@ inputs:
   n_threads:
     type: Integer
     optional: true
+  run_parallel:
+    type: Boolean
+    optional: true
   verbose:
     type: Boolean
     default: False
@@ -39,6 +42,7 @@ command: >-
   --model {inputs.model}
   --output {outputs.predictions}
   [--num_threads {inputs.n_threads}]
+  [--run_parallel {inputs.run_parallel}]
   --predict_disable_shape_check {inputs.predict_disable_shape_check}
   --verbose {inputs.verbose}
   [--custom_properties {inputs.custom_properties}]

From 37227f6b072ce5bd980da8f5b4f9929481a0885a Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Wed, 19 Oct 2022 14:30:44 -0700
Subject: [PATCH 06/20] update docs with results for onnx

---
 .../benchmarks/lightgbm-inferencing.yaml      |   2 +
 docs/results/inferencing.md                   | 178 +++++++++++-------
 requirements.txt                              |   3 +
 src/pipelines/azureml/lightgbm_inferencing.py |  23 +++
 src/scripts/analysis/analyze.py               |  74 +++++---
 src/scripts/inferencing/lightgbm_ort/score.py |  50 ++++-
 .../inferencing/lightgbm_ort/spec.yaml        |   6 +-
 7 files changed, 230 insertions(+), 106 deletions(-)

diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 18bd852e..262964d8 100644
--- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -84,7 +84,9 @@ lightgbm_inferencing_config:
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
     # - framework: lightgbm_ray # ray implementation
     - framework: lightgbm_ort # ONNX RT implementation
+    - framework: lightgbm_ort_batch # ONNX RT implementation
     - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation
+    - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded implementation
     - framework: treelite_python # v1.3.0
 
     # to use custom_win_cli, you need to compile your own binaries
diff --git a/docs/results/inferencing.md b/docs/results/inferencing.md
index 8395ab49..b9f6242d 100644
--- a/docs/results/inferencing.md
+++ b/docs/results/inferencing.md
@@ -8,31 +8,35 @@
 
 ## Variants
 
-| variant_id        |   index | framework       | version          | build                                                      |   cpu count |   num threads | machine   | system   |
-|:------------------|--------:|:----------------|:-----------------|:-----------------------------------------------------------|------------:|--------------:|:----------|:---------|
-| lightgbm#0        |       0 | lightgbm        | PYTHON_API.3.3.0 | default                                                    |          16 |             1 | x86_64    | Linux    |
-| lightgbm#1        |       1 | lightgbm        | C_API.3.3.0      | default                                                    |          16 |             1 | x86_64    | Linux    |
-| lightgbm#2        |       2 | lightgbm        | C_API.3.3.0      | docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile |          16 |             1 | x86_64    | Linux    |
-| lightgbm#3        |       3 | lightgbm        | C_API.3.2.1      | docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile      |          16 |             1 | x86_64    | Linux    |
-| lightgbm#4        |       4 | lightgbm        | C_API.3.2.1      | docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile |          16 |             1 | x86_64    | Linux    |
-| treelite_python#5 |       5 | treelite_python | 1.3.0            | default                                                    |          16 |             1 | x86_64    | Linux    |
+| variant_id                 |   index | framework                | version          | build                                                      |   cpu count |   num threads | machine   | system   |
+|:---------------------------|--------:|:-------------------------|:-----------------|:-----------------------------------------------------------|------------:|--------------:|:----------|:---------|
+| lightgbm#0                 |       0 | lightgbm                 | PYTHON_API.3.3.0 | default                                                    |          16 |             1 | x86_64    | Linux    |
+| lightgbm#1                 |       1 | lightgbm                 | C_API.3.3.0      | default                                                    |          16 |             1 | x86_64    | Linux    |
+| lightgbm#2                 |       2 | lightgbm                 | C_API.3.3.0      | docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile |          16 |             1 | x86_64    | Linux    |
+| lightgbm#3                 |       3 | lightgbm                 | C_API.3.2.1      | docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile      |          16 |             1 | x86_64    | Linux    |
+| lightgbm#4                 |       4 | lightgbm                 | C_API.3.2.1      | docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile |          16 |             1 | x86_64    | Linux    |
+| onnx#5                     |       5 | onnx                     | ONNXRT.1.12.1    | default                                                    |          16 |             1 | x86_64    | Linux    |
+| onnx_batch#6               |       6 | onnx_batch               | ONNXRT.1.12.1    | default                                                    |          16 |             1 | x86_64    | Linux    |
+| onnx_multithreaded#7       |       7 | onnx_multithreaded       | ONNXRT.1.12.1    | default                                                    |          16 |             - | x86_64    | Linux    |
+| onnx_multithreaded_batch#8 |       8 | onnx_multithreaded_batch | ONNXRT.1.12.1    | default                                                    |          16 |             - | x86_64    | Linux    |
+| treelite_python#9          |       9 | treelite_python          | 2.1.0            | default                                                    |          16 |             1 | x86_64    | Linux    |
 
 ## Metric time_inferencing per prediction (usecs)
 
-| inferencing task config                |   lightgbm#0 |   lightgbm#1 |   lightgbm#2 |   lightgbm#3 |   lightgbm#4 |   treelite_python#5 |
-|:---------------------------------------|-------------:|-------------:|-------------:|-------------:|-------------:|--------------------:|
-| 10 trees<br/>31 leaves<br/>10 cols     |      6.71442 |      1.27191 |      1.88084 |      1.97014 |      1.50457 |            0.299835 |
-| 10 trees<br/>31 leaves<br/>100 cols    |     10.0109  |      1.87281 |      1.89273 |      1.51227 |      1.93901 |            0.465536 |
-| 10 trees<br/>31 leaves<br/>1000 cols   |     37.308   |      4.32708 |      4.70362 |      7.06888 |      4.72284 |            2.08173  |
-| 100 trees<br/>31 leaves<br/>10 cols    |     18.8272  |     12.7087  |     14.9646  |     10.8278  |     16.6011  |            5.27241  |
-| 100 trees<br/>31 leaves<br/>100 cols   |     23.524   |      9.6317  |     11.2825  |     15.0675  |     13.3228  |            7.3904   |
-| 100 trees<br/>31 leaves<br/>1000 cols  |     45.8476  |     14.3042  |     18.5159  |     15.6538  |     14.9914  |            7.93605  |
-| 1000 trees<br/>31 leaves<br/>10 cols   |    113.854   |     95.4644  |    104.575   |     93.1975  |    107.137   |           28.5369   |
-| 1000 trees<br/>31 leaves<br/>100 cols  |    173.506   |    136.601   |    137.953   |    137.349   |    165.446   |           96.1941   |
-| 1000 trees<br/>31 leaves<br/>1000 cols |    178.49    |    143.14    |    143.734   |    146.814   |    149.186   |           98.9669   |
-| 5000 trees<br/>31 leaves<br/>10 cols   |    395.046   |    394.296   |    425.493   |    326.193   |    443.607   |          251.199    |
-| 5000 trees<br/>31 leaves<br/>100 cols  |    467.79    |    459.998   |    535.714   |    537.431   |    450.346   |          295.183    |
-| 5000 trees<br/>31 leaves<br/>1000 cols |    645.185   |    580.791   |    574.005   |    643.234   |    591.006   |          442.544    |
+| inferencing task config                |   lightgbm#0 |   lightgbm#1 |   lightgbm#2 |   lightgbm#3 |   lightgbm#4 |    onnx#5 |   onnx_batch#6 |   onnx_multithreaded#7 |   onnx_multithreaded_batch#8 |   treelite_python#9 |
+|:---------------------------------------|-------------:|-------------:|-------------:|-------------:|-------------:|----------:|---------------:|-----------------------:|-----------------------------:|--------------------:|
+| 10 trees<br/>31 leaves<br/>10 cols     |      6.95305 |      1.11553 |      1.19408 |      1.15504 |      1.12653 |   7.62398 |      0.0969134 |                21.4563 |                     0.198045 |            0.303221 |
+| 10 trees<br/>31 leaves<br/>100 cols    |      9.9608  |      1.57071 |      1.81644 |      1.55628 |      1.73756 |   7.67336 |      0.149622  |                22.5913 |                     0.303975 |            0.449347 |
+| 10 trees<br/>31 leaves<br/>1000 cols   |     36.8206  |      3.97296 |      4.00286 |      4.35525 |      4.56862 |   7.65319 |      1.23701   |                21.9663 |                     1.03079  |            1.90513  |
+| 100 trees<br/>31 leaves<br/>10 cols    |     16.081   |     10.3246  |     11.2351  |     10.4623  |     10.411   |  12.9457  |      0.489068  |                27.8963 |                     0.518232 |            5.12872  |
+| 100 trees<br/>31 leaves<br/>100 cols   |     18.419   |     10.2733  |      9.27452 |     10.6115  |     10.4095  |  13.1084  |      0.691856  |                26.6879 |                     0.637577 |            5.73254  |
+| 100 trees<br/>31 leaves<br/>1000 cols  |     45.0129  |     12.6701  |     11.4707  |     12.7013  |     12.794   |  11.9506  |      2.29946   |                28.9509 |                     1.98307  |            7.35011  |
+| 1000 trees<br/>31 leaves<br/>10 cols   |     97.3209  |     97.622   |    103.892   |     95.7561  |     97.6808  |  18.3931  |      3.95854   |                40.0455 |                     4.24206  |           33.3337   |
+| 1000 trees<br/>31 leaves<br/>100 cols  |    154.284   |    146.32    |    154.788   |    149.401   |    149.942   |  20.4271  |      5.15573   |                40.3441 |                     4.93979  |           96.6871   |
+| 1000 trees<br/>31 leaves<br/>1000 cols |    165.235   |    140.012   |    150.223   |    143.748   |    141.769   |  20.1743  |     11.7819    |                36.897  |                    12.1277   |          101.73     |
+| 5000 trees<br/>31 leaves<br/>10 cols   |    376.015   |    407.244   |    373.407   |    366.11    |    383.453   |  43.7589  |     10.8586    |                85.8648 |                    10.1721   |          219.653    |
+| 5000 trees<br/>31 leaves<br/>100 cols  |    421.179   |    465.234   |    482.583   |    468.308   |    473.928   | 104.56    |     24.15      |               156.015  |                    24.2661   |          300.779    |
+| 5000 trees<br/>31 leaves<br/>1000 cols |    644.905   |    587.578   |    581.033   |    625.28    |    598.814   |  94.8404  |     58.758     |               127.584  |                    58.3206   |          416.228    |
 
 ## Percentile metrics for each variant
 
@@ -43,66 +47,100 @@ Some variants above report percentile metrics. Those are reported by computing i
 
 | inferencing task config                |   p50_usecs |   p90_usecs |   p99_usecs |
 |:---------------------------------------|------------:|------------:|------------:|
-| 10 trees<br/>31 leaves<br/>10 cols     |      1.3    |       1.5   |       1.6   |
-| 10 trees<br/>31 leaves<br/>100 cols    |      1.8    |       2     |       3.1   |
-| 10 trees<br/>31 leaves<br/>1000 cols   |      4.201  |       4.5   |       5.6   |
-| 100 trees<br/>31 leaves<br/>10 cols    |     12.6    |      13.8   |      19.1   |
-| 100 trees<br/>31 leaves<br/>100 cols   |      9.501  |      10     |      12.802 |
-| 100 trees<br/>31 leaves<br/>1000 cols  |     14.301  |      15.601 |      25.001 |
-| 1000 trees<br/>31 leaves<br/>10 cols   |     95.1015 |      98.801 |     108.803 |
-| 1000 trees<br/>31 leaves<br/>100 cols  |    131.001  |     145.6   |     215.101 |
-| 1000 trees<br/>31 leaves<br/>1000 cols |    142.601  |     145.202 |     157.302 |
-| 5000 trees<br/>31 leaves<br/>10 cols   |    383.404  |     430.905 |     584.61  |
-| 5000 trees<br/>31 leaves<br/>100 cols  |    448.404  |     504.305 |     633.407 |
-| 5000 trees<br/>31 leaves<br/>1000 cols |    557.003  |     640.203 |     836.145 |
+| 10 trees<br/>31 leaves<br/>10 cols     |       1.1   |       1.2   |       1.399 |
+| 10 trees<br/>31 leaves<br/>100 cols    |       1.6   |       1.7   |       1.9   |
+| 10 trees<br/>31 leaves<br/>1000 cols   |       3.9   |       4.2   |       4.5   |
+| 100 trees<br/>31 leaves<br/>10 cols    |      10.3   |      11     |      11.601 |
+| 100 trees<br/>31 leaves<br/>100 cols   |      10.2   |      10.7   |      11.1   |
+| 100 trees<br/>31 leaves<br/>1000 cols  |      12.601 |      13.001 |      13.6   |
+| 1000 trees<br/>31 leaves<br/>10 cols   |      96     |     102.001 |     114.201 |
+| 1000 trees<br/>31 leaves<br/>100 cols  |     145.899 |     150.599 |     161.099 |
+| 1000 trees<br/>31 leaves<br/>1000 cols |     139.124 |     142.024 |     154.528 |
+| 5000 trees<br/>31 leaves<br/>10 cols   |     405.801 |     424.302 |     444.202 |
+| 5000 trees<br/>31 leaves<br/>100 cols  |     464.302 |     476.601 |     490.101 |
+| 5000 trees<br/>31 leaves<br/>1000 cols |     585.368 |     600.169 |     611.8   |
 
 ### lightgbm#2
 
 | inferencing task config                |   p50_usecs |   p90_usecs |   p99_usecs |
 |:---------------------------------------|------------:|------------:|------------:|
-| 10 trees<br/>31 leaves<br/>10 cols     |       1.8   |       2.1   |     2.601   |
-| 10 trees<br/>31 leaves<br/>100 cols    |       1.9   |       2     |     2.10001 |
-| 10 trees<br/>31 leaves<br/>1000 cols   |       4.7   |       4.901 |     5.4     |
-| 100 trees<br/>31 leaves<br/>10 cols    |      13.7   |      15.4   |    37.204   |
-| 100 trees<br/>31 leaves<br/>100 cols   |      10.8   |      12.901 |    17.301   |
-| 100 trees<br/>31 leaves<br/>1000 cols  |      17.7   |      19.001 |    31.4     |
-| 1000 trees<br/>31 leaves<br/>10 cols   |     104.003 |     108.703 |   122.603   |
-| 1000 trees<br/>31 leaves<br/>100 cols  |     132.501 |     149.701 |   221.015   |
-| 1000 trees<br/>31 leaves<br/>1000 cols |     138.702 |     160.802 |   219.107   |
-| 5000 trees<br/>31 leaves<br/>10 cols   |     425.024 |     463.626 |   496.927   |
-| 5000 trees<br/>31 leaves<br/>100 cols  |     508.705 |     588.917 |   946.39    |
-| 5000 trees<br/>31 leaves<br/>1000 cols |     550.905 |     624.606 |   810.269   |
+| 10 trees<br/>31 leaves<br/>10 cols     |       1.2   |       1.3   |       1.5   |
+| 10 trees<br/>31 leaves<br/>100 cols    |       1.8   |       1.9   |       2.1   |
+| 10 trees<br/>31 leaves<br/>1000 cols   |       3.9   |       4.2   |       4.8   |
+| 100 trees<br/>31 leaves<br/>10 cols    |      11.1   |      12     |      13.8   |
+| 100 trees<br/>31 leaves<br/>100 cols   |       9.3   |       9.601 |      10     |
+| 100 trees<br/>31 leaves<br/>1000 cols  |      11.399 |      11.799 |      13.401 |
+| 1000 trees<br/>31 leaves<br/>10 cols   |     103.501 |     108.1   |     116.9   |
+| 1000 trees<br/>31 leaves<br/>100 cols  |     154.296 |     159.296 |     170.495 |
+| 1000 trees<br/>31 leaves<br/>1000 cols |     149.602 |     152.301 |     164.802 |
+| 5000 trees<br/>31 leaves<br/>10 cols   |     372.405 |     389.205 |     405.207 |
+| 5000 trees<br/>31 leaves<br/>100 cols  |     481.504 |     496.705 |     510.607 |
+| 5000 trees<br/>31 leaves<br/>1000 cols |     578.888 |     596.699 |     618.387 |
 
 ### lightgbm#3
 
 | inferencing task config                |   p50_usecs |   p90_usecs |   p99_usecs |
 |:---------------------------------------|------------:|------------:|------------:|
-| 10 trees<br/>31 leaves<br/>10 cols     |       1.8   |       2.3   |       3.1   |
-| 10 trees<br/>31 leaves<br/>100 cols    |       1.5   |       1.6   |       1.9   |
-| 10 trees<br/>31 leaves<br/>1000 cols   |       6.3   |       7.2   |      23.901 |
-| 100 trees<br/>31 leaves<br/>10 cols    |      10.8   |      11.6   |      12.6   |
-| 100 trees<br/>31 leaves<br/>100 cols   |      14.3   |      15.7   |      29.903 |
-| 100 trees<br/>31 leaves<br/>1000 cols  |      15.1   |      16.2   |      27.201 |
-| 1000 trees<br/>31 leaves<br/>10 cols   |      85.301 |     109.901 |     168.301 |
-| 1000 trees<br/>31 leaves<br/>100 cols  |     132.401 |     149.601 |     201.402 |
-| 1000 trees<br/>31 leaves<br/>1000 cols |     146.202 |     148.903 |     161.503 |
-| 5000 trees<br/>31 leaves<br/>10 cols   |     312.703 |     354.715 |     505.311 |
-| 5000 trees<br/>31 leaves<br/>100 cols  |     537.638 |     582.651 |     608.343 |
-| 5000 trees<br/>31 leaves<br/>1000 cols |     641.307 |     654.907 |     667.409 |
+| 10 trees<br/>31 leaves<br/>10 cols     |       1.1   |       1.3   |       1.5   |
+| 10 trees<br/>31 leaves<br/>100 cols    |       1.5   |       1.7   |       1.9   |
+| 10 trees<br/>31 leaves<br/>1000 cols   |       4.3   |       4.5   |       4.9   |
+| 100 trees<br/>31 leaves<br/>10 cols    |      10.401 |      11.2   |      11.8   |
+| 100 trees<br/>31 leaves<br/>100 cols   |      10.601 |      11.001 |      11.401 |
+| 100 trees<br/>31 leaves<br/>1000 cols  |      12.601 |      13     |      13.6   |
+| 1000 trees<br/>31 leaves<br/>10 cols   |      95.5   |      99     |     108.8   |
+| 1000 trees<br/>31 leaves<br/>100 cols  |     149     |     153.8   |     164.202 |
+| 1000 trees<br/>31 leaves<br/>1000 cols |     142.699 |     145.799 |     158.899 |
+| 5000 trees<br/>31 leaves<br/>10 cols   |     363.53  |     384.032 |     427.939 |
+| 5000 trees<br/>31 leaves<br/>100 cols  |     466.461 |     479.863 |     501.27  |
+| 5000 trees<br/>31 leaves<br/>1000 cols |     622.902 |     637.601 |     650.101 |
 
 ### lightgbm#4
 
 | inferencing task config                |   p50_usecs |   p90_usecs |   p99_usecs |
 |:---------------------------------------|------------:|------------:|------------:|
-| 10 trees<br/>31 leaves<br/>10 cols     |       1.3   |       1.7   |       2.7   |
-| 10 trees<br/>31 leaves<br/>100 cols    |       1.8   |       2.2   |       2.6   |
-| 10 trees<br/>31 leaves<br/>1000 cols   |       4.7   |       4.9   |       5.3   |
-| 100 trees<br/>31 leaves<br/>10 cols    |      15.7   |      17.2   |      34.9   |
-| 100 trees<br/>31 leaves<br/>100 cols   |      12.201 |      13.501 |      48.706 |
-| 100 trees<br/>31 leaves<br/>1000 cols  |      14.901 |      16.101 |      24.701 |
-| 1000 trees<br/>31 leaves<br/>10 cols   |      97.301 |     136.401 |     201.902 |
-| 1000 trees<br/>31 leaves<br/>100 cols  |     164.901 |     170.101 |     182.801 |
-| 1000 trees<br/>31 leaves<br/>1000 cols |     148.403 |     151.003 |     166.205 |
-| 5000 trees<br/>31 leaves<br/>10 cols   |     439.327 |     492.54  |     602.444 |
-| 5000 trees<br/>31 leaves<br/>100 cols  |     439.432 |     490.245 |     605.846 |
-| 5000 trees<br/>31 leaves<br/>1000 cols |     571.902 |     640.112 |     827.614 |
+| 10 trees<br/>31 leaves<br/>10 cols     |       1.1   |       1.3   |       1.4   |
+| 10 trees<br/>31 leaves<br/>100 cols    |       1.7   |       1.9   |       2     |
+| 10 trees<br/>31 leaves<br/>1000 cols   |       4.5   |       4.8   |       5.2   |
+| 100 trees<br/>31 leaves<br/>10 cols    |      10.4   |      11.1   |      11.9   |
+| 100 trees<br/>31 leaves<br/>100 cols   |      10.4   |      10.8   |      11.3   |
+| 100 trees<br/>31 leaves<br/>1000 cols  |      12.798 |      13.099 |      13.598 |
+| 1000 trees<br/>31 leaves<br/>10 cols   |      97.302 |     101.201 |     111.002 |
+| 1000 trees<br/>31 leaves<br/>100 cols  |     149.489 |     154.29  |     165.188 |
+| 1000 trees<br/>31 leaves<br/>1000 cols |     141.2   |     143.601 |     156.5   |
+| 5000 trees<br/>31 leaves<br/>10 cols   |     382.303 |     398.402 |     413.602 |
+| 5000 trees<br/>31 leaves<br/>100 cols  |     472.51  |     485.21  |     499.01  |
+| 5000 trees<br/>31 leaves<br/>1000 cols |     596.097 |     611.307 |     625.896 |
+
+### onnx#5
+
+| inferencing task config                |   p50_usecs |   p90_usecs |   p99_usecs |
+|:---------------------------------------|------------:|------------:|------------:|
+| 10 trees<br/>31 leaves<br/>10 cols     |      7.51   |      7.6    |      8.88   |
+| 10 trees<br/>31 leaves<br/>100 cols    |      7.5998 |      7.6798 |      8.8698 |
+| 10 trees<br/>31 leaves<br/>1000 cols   |      7.59   |      7.6901 |      8.91   |
+| 100 trees<br/>31 leaves<br/>10 cols    |     12.85   |     13.09   |     14.6201 |
+| 100 trees<br/>31 leaves<br/>100 cols   |     12.9402 |     13.6202 |     14.7802 |
+| 100 trees<br/>31 leaves<br/>1000 cols  |     11.8401 |     12.09   |     13.4901 |
+| 1000 trees<br/>31 leaves<br/>10 cols   |     18.0601 |     19.2001 |     21.3902 |
+| 1000 trees<br/>31 leaves<br/>100 cols  |     20.1093 |     21.0993 |     23.7093 |
+| 1000 trees<br/>31 leaves<br/>1000 cols |     19.6325 |     21.1828 |     23.7534 |
+| 5000 trees<br/>31 leaves<br/>10 cols   |     43.3894 |     45.2993 |     47.5894 |
+| 5000 trees<br/>31 leaves<br/>100 cols  |    104.27   |    111.281  |    118.342  |
+| 5000 trees<br/>31 leaves<br/>1000 cols |     94.5217 |     97.9918 |    101.332  |
+
+### onnx_multithreaded#7
+
+| inferencing task config                |   p50_usecs |   p90_usecs |   p99_usecs |
+|:---------------------------------------|------------:|------------:|------------:|
+| 10 trees<br/>31 leaves<br/>10 cols     |     21.8309 |     22.6609 |     24.3009 |
+| 10 trees<br/>31 leaves<br/>100 cols    |     21.985  |     23.721  |     45.6108 |
+| 10 trees<br/>31 leaves<br/>1000 cols   |     22.2599 |     23.7498 |     25.5398 |
+| 100 trees<br/>31 leaves<br/>10 cols    |     26.3017 |     29.4928 |     42.4632 |
+| 100 trees<br/>31 leaves<br/>100 cols   |     25.7001 |     28.0201 |     42.1234 |
+| 100 trees<br/>31 leaves<br/>1000 cols  |     27.34   |     29.691  |     38.3805 |
+| 1000 trees<br/>31 leaves<br/>10 cols   |     36.7701 |     40.1901 |    105.035  |
+| 1000 trees<br/>31 leaves<br/>100 cols  |     36.8403 |     39.6914 |    109.843  |
+| 1000 trees<br/>31 leaves<br/>1000 cols |     33.7296 |     36.1596 |     68.4439 |
+| 5000 trees<br/>31 leaves<br/>10 cols   |     72.6305 |     91.5047 |    400.135  |
+| 5000 trees<br/>31 leaves<br/>100 cols  |    122.421  |    173.977  |    828.446  |
+| 5000 trees<br/>31 leaves<br/>1000 cols |    101.62   |    130.733  |    732.035  |
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 226c6181..e0d7e00c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,6 +8,9 @@ psutil==5.9.3
 # frameworks
 ray==2.0.0
 lightgbm-ray==0.1.7
+onnxruntime==1.12.1
+onnxmltools==1.11.1
+onnxconverter-common==1.12.2
 lightgbm==3.3.3
 treelite==3.0.0
 treelite-runtime==3.0.0
diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py
index 4a81fe43..9da7934d 100644
--- a/src/pipelines/azureml/lightgbm_inferencing.py
+++ b/src/pipelines/azureml/lightgbm_inferencing.py
@@ -197,6 +197,29 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
                 custom_properties = custom_properties
             )
             inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+            
+        elif variant.framework == "lightgbm_ort_batch":
+            # call module with all the right arguments
+            inferencing_step = lightgbm_ort_score_module(
+                data=data,
+                model=model,
+                verbose=False,
+                run_batch=True,
+                custom_properties=custom_properties,
+            )
+            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+
+        elif variant.framework == "lightgbm_ort_multithread_batch":
+            # call module with all the right arguments
+            inferencing_step = lightgbm_ort_score_module(
+                data=data,
+                model=model,
+                verbose=False,
+                run_parallel=True,
+                run_batch=True,
+                custom_properties=custom_properties,
+            )
+            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
 
         else:
             raise NotImplementedError(f"framework {variant.framework} not implemented (yet)")
diff --git a/src/scripts/analysis/analyze.py b/src/scripts/analysis/analyze.py
index 3d9e0ffe..fbf2ac1c 100644
--- a/src/scripts/analysis/analyze.py
+++ b/src/scripts/analysis/analyze.py
@@ -1,5 +1,5 @@
 # Copyright (c) Microsoft Corporation.
-# Licensed under the MIT license. 
+# Licensed under the MIT license.
 
 """
 TreeLite/Python inferencing script
@@ -131,9 +131,10 @@ def fetch_benchmark_data(self, experiment_id, filter_string):
 
         self.logger.info("Fetching Benchmark Runs")
         # NOTE: returns a pandas dataframe
-        self.benchmark_data = mlflow.search_runs(
-            filter_string=filter_string
-        )
+        self.benchmark_data = mlflow.search_runs(filter_string=filter_string)
+        self.benchmark_data = self.benchmark_data[
+            self.benchmark_data.status == "FINISHED"
+        ]
 
         # extract all model information if present
         if 'tags.benchmark_model' in self.benchmark_data.columns:
@@ -160,6 +161,15 @@ def fetch_benchmark_data(self, experiment_id, filter_string):
 
     def report_inferencing(self, output_path):
         """ Uses fetched or load data to produce a reporting for inferencing tasks. """
+
+        # Drop rows which do not specify the time
+        self.benchmark_data = self.benchmark_data.dropna(
+            subset=[
+                "metrics.time_inferencing",
+                "dataset_samples",
+            ]
+        )
+
         # create variant readable id
         self.benchmark_data['variant_id'] = self.benchmark_data['tags.framework'] + "#" + self.benchmark_data['tags.variant_index']
 
@@ -190,7 +200,6 @@ def report_inferencing(self, output_path):
         variant_indices_sorted = [ variant_indices[k] for k in variant_indices_sorted_keys ]
 
         variants.columns = ['index', 'framework', 'version', 'build', 'cpu count', 'num threads', 'machine', 'system']
-        #variants = variants.transpose()
 
         # reduce time_inferencing to predict time per request, in micro seconds
         self.benchmark_data['avg_predict_time_usecs'] = self.benchmark_data['metrics.time_inferencing'].astype(float) / self.benchmark_data['dataset_samples'].astype(int) * 1000000
@@ -202,6 +211,13 @@ def report_inferencing(self, output_path):
             + self.benchmark_data['model_columns'] + " cols"
         )
 
+        # Take last measurement per inferencing task config
+        self.benchmark_data = (
+            self.benchmark_data.sort_values("start_time")
+            .groupby(["inferencing task config", "variant_id"])
+            .last()
+        ).reset_index()
+
         # pivot metrics table
         metrics = self.benchmark_data.pivot(
             index=['inferencing task config'],
@@ -216,32 +232,38 @@ def report_inferencing(self, output_path):
 
         for variant_id in variant_indices_sorted:
             percentile_metrics_values = (
-                self.benchmark_data.loc[self.benchmark_data['variant_id'] == variant_id][[
-                    'inferencing task config',
-                    'variant_id',
-                    'metrics.batch_time_inferencing_p50_usecs',
-                    'metrics.batch_time_inferencing_p90_usecs',
-                    'metrics.batch_time_inferencing_p99_usecs'
-                ]]
+                self.benchmark_data.loc[
+                    self.benchmark_data["variant_id"] == variant_id
+                ][
+                    [
+                        "inferencing task config",
+                        "variant_id",
+                        "metrics.batch_latency_p50_usecs",
+                        "metrics.batch_latency_p90_usecs",
+                        "metrics.batch_latency_p99_usecs",
+                    ]
+                ]
             ).dropna()
-            
+
             if len(percentile_metrics_values) == 0:
                 continue
 
-            percentile_metrics = (
-                percentile_metrics_values.pivot(
-                    index=['inferencing task config'],
-                    columns=['variant_id'],
-                    values=['metrics.batch_time_inferencing_p50_usecs', 'metrics.batch_time_inferencing_p90_usecs', 'metrics.batch_time_inferencing_p99_usecs']
-                )
+            percentile_metrics = percentile_metrics_values.pivot(
+                index=["inferencing task config"],
+                columns=["variant_id"],
+                values=[
+                    "metrics.batch_latency_p50_usecs",
+                    "metrics.batch_latency_p90_usecs",
+                    "metrics.batch_latency_p99_usecs",
+                ],
             )
-            percentile_metrics.columns = [ col[0].lstrip("metrics.batch_time_inferencing_") for col in percentile_metrics.columns ]
+            percentile_metrics.columns = [
+                col[0].lstrip("metrics.batch_latency_")
+                for col in percentile_metrics.columns
+            ]
 
             percentile_metrics_reports.append(
-                {
-                    'variant_id' : variant_id,
-                    'report' : percentile_metrics.to_markdown()
-                }
+                {"variant_id": variant_id, "report": percentile_metrics.to_markdown()}
             )
 
         # load the jinja template from local files
@@ -297,10 +319,10 @@ def run(args, unknown_args=[]):
                 experiment_id=args.experiment_id,
                 filter_string=f"tags.task = 'score' and tags.benchmark_name = '{args.benchmark_id}'"
             )
-        
+
         if args.data_save:
             analysis_engine.save_benchmark_data(args.data_save)
-        
+
         analysis_engine.report_inferencing(args.output)
 
     else:
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index 5ee427e9..4d7d028f 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -31,9 +31,15 @@
 
 class LightGBMONNXRTInferecingScript(RunnableScript):
     def __init__(self):
+        framework = "onnx"
+        if "--run_parallel" in sys.argv:
+            framework += "_multithreaded"
+        if "--run_batch" in sys.argv:
+            framework += "_batch"
+
         super().__init__(
             task="score",
-            framework="lightgbm",
+            framework=framework,
             framework_version="ONNXRT." + str(ort.__version__),
         )
 
@@ -95,7 +101,14 @@ def get_arg_parser(cls, parser=None):
             required=False,
             default=False,
             type=bool,
-            help="number of threads",
+            help="allows intra sample parallelism",
+        )
+        group_params.add_argument(
+            "--run_batch",
+            required=False,
+            default=False,
+            type=bool,
+            help="runs inference in a single batch",
         )
         group_params.add_argument(
             "--predict_disable_shape_check",
@@ -215,22 +228,41 @@ def run(self, args, logger, metrics_logger, unknown_args):
 
         time_inferencing_per_query = []
         timeit_loops = 10
-        for i in range(len(inference_raw_data)):
+
+        if args.run_batch:
+            batch_length = len(inference_raw_data)
             prediction_time = timeit.timeit(
-                lambda: sessionml.run(
-                    [sessionml.get_outputs()[0].name],
-                    {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},
+                lambda: sessionml_batch.run(
+                    [sessionml_batch.get_outputs()[0].name],
+                    {sessionml_batch.get_inputs()[0].name: inference_raw_data},
                 ),
                 number=timeit_loops,
             )
             prediction_time /= timeit_loops
-            time_inferencing_per_query.append(prediction_time)
-        metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_query))
+            metrics_logger.log_metric("time_inferencing", prediction_time)
+            time_inferencing_per_query = [prediction_time]
+        else:
+            batch_length = 1
+            for i in range(len(inference_raw_data)):
+                prediction_time = timeit.timeit(
+                    lambda: sessionml.run(
+                        [sessionml.get_outputs()[0].name],
+                        {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},
+                    ),
+                    number=timeit_loops,
+                )
+                prediction_time /= timeit_loops
+                time_inferencing_per_query.append(prediction_time)
+            metrics_logger.log_metric(
+                "time_inferencing", sum(time_inferencing_per_query)
+            )
+
+        logger.info(f"Batch size: {batch_length}")
 
         # use helper to log latency with the right metric names
         metrics_logger.log_inferencing_latencies(
             time_inferencing_per_query,  # only one big batch
-            batch_length=len(inference_raw_data),
+            batch_length=batch_length,
             factor_to_usecs=1000000.0,  # values are in seconds
         )
 
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
index f97e94ce..c35157ab 100644
--- a/src/scripts/inferencing/lightgbm_ort/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -1,6 +1,6 @@
 $schema: http://azureml/sdk-2-0/CommandComponent.json
 name: lightgbm_ort_score
-version: 0.0.2
+version: 0.0.3
 display_name: "LightGBM Inferencing (ONNX RT)"
 type: CommandComponent
 description: "LightGBM inferencing using the ONNX Runtime."
@@ -24,6 +24,9 @@ inputs:
   run_parallel:
     type: Boolean
     optional: true
+  run_batch:
+    type: Boolean
+    optional: true
   verbose:
     type: Boolean
     default: False
@@ -43,6 +46,7 @@ command: >-
   --output {outputs.predictions}
   [--num_threads {inputs.n_threads}]
   [--run_parallel {inputs.run_parallel}]
+  [--run_batch {inputs.run_batch}]
   --predict_disable_shape_check {inputs.predict_disable_shape_check}
   --verbose {inputs.verbose}
   [--custom_properties {inputs.custom_properties}]

From 8730179e13fefa1461d99461f8b0450adee7aba6 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Wed, 19 Oct 2022 14:54:51 -0700
Subject: [PATCH 07/20] delete file

---
 .../lightgbm-inferencing-prod.yaml            | 51 -------------------
 1 file changed, 51 deletions(-)
 delete mode 100644 conf/experiments/lightgbm-inferencing-prod.yaml

diff --git a/conf/experiments/lightgbm-inferencing-prod.yaml b/conf/experiments/lightgbm-inferencing-prod.yaml
deleted file mode 100644
index d2898f2d..00000000
--- a/conf/experiments/lightgbm-inferencing-prod.yaml
+++ /dev/null
@@ -1,51 +0,0 @@
-# This experiment runs multiple variants of lightgbm inferencing + treelite
-# on a given user-defined dataset and model
-#
-# to execute:
-# > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml
-
-defaults:
-  - aml: custom_prod
-  - compute: custom_prod
-
-### CUSTOM PARAMETERS ###
-
-experiment:
-  name: "lightgbm_inferencing_prod"
-  description: "something interesting to say about this"
-
-run:
-  submit: true
-
-lightgbm_inferencing_config:
-  # name of your particular benchmark
-  benchmark_name: "benchmark-prod" # override this with a unique name
-
-  # list all the data/model pairs to run inferencing with
-  tasks:
-    - data:
-        name: "NiR4_OFE_FR_NOFF_DATA"
-      model:
-        name: "NiR4_OFE_LGBM"
-    - data:
-        name: "NiR4_HRS_FR_NOFF_DATA"
-      model:
-        name: "NiR4_HRS_LGBM"
-
-  # list all inferencing frameworks and their builds
-  variants:
-    # - framework: lightgbm_python # v3.3.0 via pypi
-    # - framework: lightgbm_c_api # v3.3.0 with C API prediction
-    # - framework: lightgbm_c_api # v3.3.0 with C API prediction
-    #   build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
-    # - framework: lightgbm_c_api # v3.2.1 with C API prediction
-    #   build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
-    # - framework: lightgbm_c_api # v3.2.1 with C API prediction
-    #   build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
-    # - framework: lightgbm_ray # ray implementation
-    - framework: lightgbm_ort # ONNX RT implementation
-    - framework: treelite_python # v1.3.0
-    
-    # to use custom_win_cli, you need to compile your own binaries
-    # see src/scripts/inferencing/custom_win_cli/static_binaries/README.md
-    #- framework: custom_win_cli

From a581cd19293c3030a302390b207c412550e6d628 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Wed, 19 Oct 2022 14:59:23 -0700
Subject: [PATCH 08/20] update configs

---
 conf/experiments/benchmarks/lightgbm-inferencing.yaml | 6 +++---
 conf/experiments/lightgbm-inferencing.yaml            | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 262964d8..17bca896 100644
--- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -82,11 +82,11 @@ lightgbm_inferencing_config:
       build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
     - framework: lightgbm_c_api # v3.2.1 with C API prediction
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
-    # - framework: lightgbm_ray # ray implementation
+    - framework: lightgbm_ray # ray implementation
     - framework: lightgbm_ort # ONNX RT implementation
-    - framework: lightgbm_ort_batch # ONNX RT implementation
+    - framework: lightgbm_ort_batch # ONNX RT single batch implementation
     - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation
-    - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded implementation
+    - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded single batch implementation
     - framework: treelite_python # v1.3.0
 
     # to use custom_win_cli, you need to compile your own binaries
diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml
index f70d2d22..bc5a667c 100644
--- a/conf/experiments/lightgbm-inferencing.yaml
+++ b/conf/experiments/lightgbm-inferencing.yaml
@@ -40,6 +40,9 @@ lightgbm_inferencing_config:
       build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
     - framework: lightgbm_ray # ray implementation
     - framework: lightgbm_ort # ONNX RT implementation
+    - framework: lightgbm_ort_batch # ONNX RT single batch implementation
+    - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation
+    - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded single batch implementation
     - framework: treelite_python # v1.3.0
     
     # to use custom_win_cli, you need to compile your own binaries

From 8ed2d05fdfacf17477706e1001980d3fb0e78bb5 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Wed, 19 Oct 2022 15:03:07 -0700
Subject: [PATCH 09/20] use timeit instead of monotonic

---
 src/scripts/inferencing/lightgbm_python/score.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_python/score.py b/src/scripts/inferencing/lightgbm_python/score.py
index 94505f5c..257e6603 100644
--- a/src/scripts/inferencing/lightgbm_python/score.py
+++ b/src/scripts/inferencing/lightgbm_python/score.py
@@ -8,7 +8,7 @@
 import sys
 import argparse
 import logging
-import time
+import timeit
 import numpy as np
 from distutils.util import strtobool
 import lightgbm
@@ -105,13 +105,21 @@ def run(self, args, logger, metrics_logger, unknown_args):
         )
 
         logger.info(f"Running .predict()")
-        batch_start_time = time.monotonic()
         predictions_array = booster.predict(
             data=inference_raw_data,
             num_threads=args.num_threads,
-            predict_disable_shape_check=bool(args.predict_disable_shape_check)
+            predict_disable_shape_check=bool(args.predict_disable_shape_check),
         )
-        prediction_time = (time.monotonic() - batch_start_time)
+        timeit_loops = 10
+        prediction_time = timeit.timeit(
+            lambda: booster.predict(
+                data=inference_raw_data,
+                num_threads=args.num_threads,
+                predict_disable_shape_check=bool(args.predict_disable_shape_check),
+            ),
+            number=timeit_loops,
+        )
+        prediction_time /= timeit_loops
         metrics_logger.log_metric("time_inferencing", prediction_time)
 
         # use helper to log latency with the right metric names

From 06a12d54d428751bbed4abc3b5dcce49d751d883 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Wed, 19 Oct 2022 15:31:29 -0700
Subject: [PATCH 10/20] use timeit instead of monotonic

---
 .../inferencing/lightgbm_python/spec.yaml        |  2 +-
 src/scripts/inferencing/lightgbm_ray/score.py    | 16 +++++++++++++---
 src/scripts/inferencing/lightgbm_ray/spec.yaml   |  2 +-
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_python/spec.yaml b/src/scripts/inferencing/lightgbm_python/spec.yaml
index 7fcbebca..05c30f24 100644
--- a/src/scripts/inferencing/lightgbm_python/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_python/spec.yaml
@@ -1,6 +1,6 @@
 $schema: http://azureml/sdk-2-0/CommandComponent.json
 name: lightgbm_python_score
-version: 1.0.1
+version: 1.0.2
 display_name: "LightGBM Inferencing (Python API)"
 type: CommandComponent
 description: "LightGBM inferencing using the Python API."
diff --git a/src/scripts/inferencing/lightgbm_ray/score.py b/src/scripts/inferencing/lightgbm_ray/score.py
index 270efc5d..67561c68 100644
--- a/src/scripts/inferencing/lightgbm_ray/score.py
+++ b/src/scripts/inferencing/lightgbm_ray/score.py
@@ -8,7 +8,7 @@
 import sys
 import argparse
 import logging
-import time
+import timeit
 import numpy as np
 from distutils.util import strtobool
 
@@ -99,13 +99,23 @@ def run(self, args, logger, metrics_logger, unknown_args):
         )
 
         logger.info(f"Running .predict()")
-        batch_start_time = time.monotonic()
+        
         predictions_array = lightgbm_ray.predict(
             booster,
             inference_data,
             ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads)
         )
-        prediction_time = (time.monotonic() - batch_start_time)
+
+        timeit_loops = 10
+        prediction_time = timeit.timeit(
+            lambda: lightgbm_ray.predict(
+                booster,
+                inference_data,
+                ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads)
+            ),
+            number=timeit_loops,
+        )
+        prediction_time /= timeit_loops
         metrics_logger.log_metric("time_inferencing", prediction_time)
 
         # use helper to log latency with the right metric names
diff --git a/src/scripts/inferencing/lightgbm_ray/spec.yaml b/src/scripts/inferencing/lightgbm_ray/spec.yaml
index d94c7ce3..482632e5 100644
--- a/src/scripts/inferencing/lightgbm_ray/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ray/spec.yaml
@@ -1,6 +1,6 @@
 $schema: http://azureml/sdk-2-0/CommandComponent.json
 name: lightgbm_ray_score
-version: 1.0.1
+version: 1.0.2
 display_name: "LightGBM Inferencing (Ray)"
 type: CommandComponent
 description: "LightGBM inferencing using the Ray Python API."

From 9dab9c0bd675f33c010f0f30bc41a39593d28a29 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Fri, 21 Oct 2022 09:43:07 -0700
Subject: [PATCH 11/20] remove run submit true

---
 conf/experiments/benchmarks/lightgbm-inferencing.yaml | 3 ---
 conf/experiments/lightgbm-inferencing.yaml            | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
index 17bca896..066b1974 100644
--- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml
+++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml
@@ -14,9 +14,6 @@ experiment:
   name: "lightgbm_inferencing_dev"
   description: "something interesting to say about this"
 
-run:
-  submit: true
-
 lightgbm_inferencing_config:
   # name of your particular benchmark
   benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime!
diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml
index bc5a667c..940d3ac2 100644
--- a/conf/experiments/lightgbm-inferencing.yaml
+++ b/conf/experiments/lightgbm-inferencing.yaml
@@ -14,9 +14,6 @@ experiment:
   name: "lightgbm_inferencing_dev"
   description: "something interesting to say about this"
 
-run:
-  submit: true
-
 lightgbm_inferencing_config:
   # name of your particular benchmark
   benchmark_name: "benchmark-dev" # override this with a unique name

From 30033be54f8b675f23834270dc7fff75591e3425 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Fri, 21 Oct 2022 10:02:20 -0700
Subject: [PATCH 12/20] correct package versions

---
 requirements.txt | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e0d7e00c..79c0ac6d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,8 +21,8 @@ optuna==3.0.3
 
 # pipelines
 shrike[pipeline]==1.31.10
-azure-core==1.26.0
-azure-storage-blob==12.13.0
+azure-core==1.20.1
+azure-storage-blob==12.11.0
 azure-ml-component==0.9.13.post1
 azureml-train-core==1.37.0
 azureml-dataset-runtime==1.46.0

From be708b1ca663be9f2753e59c2b8e0dcc5a2a3855 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Wed, 2 Nov 2022 11:25:33 -0700
Subject: [PATCH 13/20] update onnx benchmarking

---
 src/common/tasks.py                           |  3 ++
 src/pipelines/azureml/lightgbm_inferencing.py | 41 ++++---------------
 src/scripts/inferencing/lightgbm_ort/score.py | 19 +++++----
 .../inferencing/lightgbm_ort/spec.yaml        |  2 +-
 4 files changed, 23 insertions(+), 42 deletions(-)

diff --git a/src/common/tasks.py b/src/common/tasks.py
index 95504774..df3d1b25 100644
--- a/src/common/tasks.py
+++ b/src/common/tasks.py
@@ -26,6 +26,9 @@ class inferencing_task:
 class inferencing_variants:
     framework: str = MISSING
     build: Optional[str] = None
+    threads: Optional[int] = 1
+    batch_exec: Optional[bool] = False
+    parallel_exec: Optional[bool] = False
 
 @dataclass
 class data_generation_task:
diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py
index 9da7934d..f272180d 100644
--- a/src/pipelines/azureml/lightgbm_inferencing.py
+++ b/src/pipelines/azureml/lightgbm_inferencing.py
@@ -183,43 +183,18 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
                 data = data,
                 model = model,
                 verbose = False,
+                run_parallel = variant.parallel_exec,
+                run_batch = variant.batch_exec,
+                n_threads = variant.threads,
                 custom_properties = custom_properties
             )
             inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
 
-        elif variant.framework == "lightgbm_ort_multithread":
-            # call module with all the right arguments
-            inferencing_step = lightgbm_ort_score_module(
-                data = data,
-                model = model,
-                verbose = False,
-                run_parallel = True,
-                custom_properties = custom_properties
-            )
-            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
-            
-        elif variant.framework == "lightgbm_ort_batch":
-            # call module with all the right arguments
-            inferencing_step = lightgbm_ort_score_module(
-                data=data,
-                model=model,
-                verbose=False,
-                run_batch=True,
-                custom_properties=custom_properties,
-            )
-            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
-
-        elif variant.framework == "lightgbm_ort_multithread_batch":
-            # call module with all the right arguments
-            inferencing_step = lightgbm_ort_score_module(
-                data=data,
-                model=model,
-                verbose=False,
-                run_parallel=True,
-                run_batch=True,
-                custom_properties=custom_properties,
-            )
-            inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
+            if variant.parallel_exec:
+                variant_comment.append(f"parallel execution")
+            if variant.batch_exec:
+                variant_comment.append(f"batch execution")
+            variant_comment.append(f"num threads {variant.threads}")
 
         else:
             raise NotImplementedError(f"framework {variant.framework} not implemented (yet)")
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index 4d7d028f..b5ee24be 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -32,10 +32,12 @@
 class LightGBMONNXRTInferecingScript(RunnableScript):
     def __init__(self):
         framework = "onnx"
-        if "--run_parallel" in sys.argv:
-            framework += "_multithreaded"
-        if "--run_batch" in sys.argv:
+        if "--run_parallel" in sys.argv and strtobool(sys.argv[sys.argv.index("--run_parallel") + 1]):
+            framework += "_parallel"
+        if "--run_batch" in sys.argv and strtobool(sys.argv[sys.argv.index("--run_batch") + 1]):
             framework += "_batch"
+        if "--num_threads" in sys.argv:
+            framework += f"_threads_{sys.argv[sys.argv.index('--num_threads') + 1]}"
 
         super().__init__(
             task="score",
@@ -99,21 +101,21 @@ def get_arg_parser(cls, parser=None):
         group_params.add_argument(
             "--run_parallel",
             required=False,
-            default=False,
-            type=bool,
+            default="False",
+            type=strtobool,
             help="allows intra sample parallelism",
         )
         group_params.add_argument(
             "--run_batch",
             required=False,
-            default=False,
-            type=bool,
+            default="False",
+            type=strtobool,
             help="runs inference in a single batch",
         )
         group_params.add_argument(
             "--predict_disable_shape_check",
             required=False,
-            default=False,
+            default="False",
             type=strtobool,
             help="See LightGBM documentation",
         )
@@ -188,6 +190,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
         if args.num_threads > 0:
             logger.info(f"Setting number of threads to {args.num_threads}")
             sess_options.intra_op_num_threads = args.num_threads
+            sess_options.inter_op_num_threads = args.num_threads
 
         if args.run_parallel:
             logger.info(f"Creating multithreaded inference session")
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
index c35157ab..b4fc6530 100644
--- a/src/scripts/inferencing/lightgbm_ort/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -1,6 +1,6 @@
 $schema: http://azureml/sdk-2-0/CommandComponent.json
 name: lightgbm_ort_score
-version: 0.0.3
+version: 0.0.6
 display_name: "LightGBM Inferencing (ONNX RT)"
 type: CommandComponent
 description: "LightGBM inferencing using the ONNX Runtime."

From 2a9ef5f0fbb8b315170e11064f91674c14d4bb0f Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 14 Nov 2022 16:28:31 -0800
Subject: [PATCH 14/20] small updates

---
 Exploration.ipynb                             | 551 ++++++++++++++++++
 .../inferencing/lightgbm_ort/spec.yaml        |   4 +-
 2 files changed, 553 insertions(+), 2 deletions(-)
 create mode 100644 Exploration.ipynb

diff --git a/Exploration.ipynb b/Exploration.ipynb
new file mode 100644
index 00000000..48b06e64
--- /dev/null
+++ b/Exploration.ipynb
@@ -0,0 +1,551 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import lightgbm\n",
+    "import timeit"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>4854</th>\n",
+       "      <th>4855</th>\n",
+       "      <th>4856</th>\n",
+       "      <th>4857</th>\n",
+       "      <th>4858</th>\n",
+       "      <th>4859</th>\n",
+       "      <th>4860</th>\n",
+       "      <th>4861</th>\n",
+       "      <th>4862</th>\n",
+       "      <th>4863</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>129</td>\n",
+       "      <td>612.0</td>\n",
+       "      <td>153000.0</td>\n",
+       "      <td>6000.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>118470648.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>628.0</td>\n",
+       "      <td>6000.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>132.0</td>\n",
+       "      <td>140.0</td>\n",
+       "      <td>137.0</td>\n",
+       "      <td>135.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>120</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>120</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>76</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>120</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 4864 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   0      1         2       3     4     5            6     7      8     \\\n",
+       "0   129  612.0  153000.0  6000.0   0.0   0.0  118470648.0   0.0  628.0   \n",
+       "1   120    0.0       0.0     0.0   0.0   0.0          0.0   0.0    0.0   \n",
+       "2   120    0.0       0.0     0.0   0.0   0.0          0.0   0.0    0.0   \n",
+       "3    76    0.0       0.0     0.0   0.0   0.0          0.0   0.0    0.0   \n",
+       "4   120    0.0       0.0     0.0   0.0   0.0          0.0   0.0    0.0   \n",
+       "\n",
+       "     9     ...  4854   4855   4856   4857   4858  4859  4860  4861  4862  4863  \n",
+       "0  6000.0  ...   0.0  132.0  140.0  137.0  135.0   0.0   0.0   0.0   0.0   0.0  \n",
+       "1     0.0  ...   0.0    0.0    0.0    0.0    0.0   0.0   0.0   0.0   0.0   0.0  \n",
+       "2     0.0  ...   0.0    0.0    0.0    0.0    0.0   0.0   0.0   0.0   0.0   0.0  \n",
+       "3     0.0  ...   0.0    0.0    0.0    0.0    0.0   0.0   0.0   0.0   0.0   0.0  \n",
+       "4     0.0  ...   0.0    0.0    0.0    0.0    0.0   0.0   0.0   0.0   0.0   0.0  \n",
+       "\n",
+       "[5 rows x 4864 columns]"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model_path = './NiR4_OFE_LGBM_model.txt'\n",
+    "data_path = './File_0-csv.txt'\n",
+    "data = pd.read_csv(data_path, header=None)\n",
+    "data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[LightGBM] [Info] Construct bin mappers from text data time 2.14 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "booster = lightgbm.Booster(model_file=model_path)\n",
+    "\n",
+    "inference_data = lightgbm.Dataset(data_path, free_raw_data=False).construct()\n",
+    "inference_raw_data = inference_data.get_data()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ 1.41436096, -0.27520206, -0.32896408, ...,  0.27021392,\n",
+       "        0.06719871,  2.11317219])"
+      ]
+     },
+     "execution_count": 43,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions_array_lgbmpython = booster.predict(\n",
+    "    data=inference_raw_data,\n",
+    "    num_threads=1,\n",
+    "    predict_disable_shape_check=True,\n",
+    ")\n",
+    "predictions_array_lgbmpython"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[13:57:51] D:\\a\\1\\s\\src\\compiler\\ast_native.cc:711: Using ASTNativeCompiler\n",
+      "[13:57:51] D:\\a\\1\\s\\src\\compiler\\ast\\split.cc:29: Parallel compilation enabled; member trees will be divided into 16 translation units.\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:92: Code generation finished. Writing code to files...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu2.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file main.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file header.h...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu5.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu0.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu1.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu3.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu4.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu6.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu7.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu8.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu9.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu10.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu11.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu12.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu13.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu14.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu15.c...\n",
+      "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file recipe.json...\n",
+      "[13:57:55] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\util.py:105: Compiling sources files in directory .\\tmpa3c08ggs into object files (*.obj)...\n",
+      "[13:58:21] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\util.py:135: Generating dynamic shared library .\\tmpa3c08ggs\\predictor.dll...\n",
+      "[13:58:23] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\__init__.py:282: Generated shared library in 28.49 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "import treelite, treelite_runtime\n",
+    "\n",
+    "model = treelite.Model.load(\n",
+    "    model_path,\n",
+    "    model_format=\"lightgbm\"\n",
+    ")\n",
+    "model.export_lib(\n",
+    "    toolchain=\"msvc\",\n",
+    "    libpath=model_path + \".so\",\n",
+    "    verbose=True,\n",
+    "    params={'parallel_comp':16}\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[13:59:32] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite_runtime\\predictor.py:159: Dynamic shared library c:\\Projects\\lightgbm-benchmark\\NiR4_OFE_LGBM_model.txt.so has been successfully loaded into memory\n"
+     ]
+    }
+   ],
+   "source": [
+    "predictor = treelite_runtime.Predictor(\n",
+    "    model_path + '.so',\n",
+    "    verbose=True,\n",
+    "    nthread=1\n",
+    ")\n",
+    "dmat = treelite_runtime.DMatrix(data.to_numpy())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictions_array_treelite =predictor.predict(dmat)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 58,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The maximum opset needed by this model is only 8.\n",
+      "The maximum opset needed by this model is only 8.\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[LightGBM] [Info] Construct bin mappers from text data time 2.43 seconds\n"
+     ]
+    }
+   ],
+   "source": [
+    "from onnxconverter_common.data_types import FloatTensorType\n",
+    "from onnxmltools.convert import convert_lightgbm\n",
+    "import onnxruntime as ort\n",
+    "import numpy as np\n",
+    "\n",
+    "with open(model_path, \"r\") as mf:\n",
+    "    model_str = mf.read()\n",
+    "    model_str = model_str.replace(\n",
+    "        \"objective=lambdarank\", \"objective=regression\"\n",
+    "    )\n",
+    "booster_ort = lightgbm.Booster(model_str=model_str)\n",
+    "\n",
+    "onnx_input_types = [\n",
+    "    (\n",
+    "        \"input\",\n",
+    "        FloatTensorType(\n",
+    "            [1, inference_data.num_feature()]\n",
+    "        ),\n",
+    "    )\n",
+    "]\n",
+    "onnx_input_batch_types = [\n",
+    "    (\n",
+    "        \"input\",\n",
+    "        FloatTensorType(\n",
+    "            [inference_data.num_data(), inference_data.num_feature()]\n",
+    "        ),\n",
+    "    )\n",
+    "]\n",
+    "onnx_ml_model = convert_lightgbm(booster_ort, initial_types=onnx_input_types)\n",
+    "onnx_ml_batch_model = convert_lightgbm(booster_ort, initial_types=onnx_input_batch_types)\n",
+    "\n",
+    "sess_options = ort.SessionOptions()\n",
+    "sess_options.intra_op_num_threads = 0\n",
+    "sess_options.inter_op_num_threads = 0\n",
+    "\n",
+    "sess_options.execution_mode = (\n",
+    "    ort.ExecutionMode.ORT_SEQUENTIAL\n",
+    ")\n",
+    "sess_options.graph_optimization_level = (\n",
+    "    ort.GraphOptimizationLevel.ORT_ENABLE_ALL\n",
+    ")\n",
+    "sessionml = ort.InferenceSession(\n",
+    "    onnx_ml_model.SerializeToString(), sess_options\n",
+    ")\n",
+    "sessionml_batch = ort.InferenceSession(\n",
+    "    onnx_ml_batch_model.SerializeToString(), sess_options\n",
+    ")\n",
+    "inference_data = lightgbm.Dataset(\n",
+    "    data_path, free_raw_data=False\n",
+    ").construct()\n",
+    "inference_raw_data = inference_data.get_data()\n",
+    "if type(inference_raw_data) == str:\n",
+    "    inference_raw_data = np.loadtxt(\n",
+    "        inference_raw_data, delimiter=\",\"\n",
+    "    ).astype(np.float32)[:, : inference_data.num_feature()]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 59,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictions_array_ort = sessionml_batch.run(\n",
+    "    [sessionml.get_outputs()[0].name],\n",
+    "    {sessionml.get_inputs()[0].name: inference_raw_data},\n",
+    ")[0][:, 0]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 72,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7666397998491448"
+      ]
+     },
+     "execution_count": 72,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "time_inferencing_per_query = []\n",
+    "for i in range(len(inference_raw_data)):\n",
+    "    prediction_time = timeit.timeit(\n",
+    "        lambda: sessionml.run(\n",
+    "            [sessionml.get_outputs()[0].name],\n",
+    "            {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},\n",
+    "        ),\n",
+    "        number=1,\n",
+    "    )\n",
+    "    time_inferencing_per_query.append(prediction_time/1)\n",
+    "sum(time_inferencing_per_query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 71,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.400464499998634"
+      ]
+     },
+     "execution_count": 71,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "timeit.timeit(lambda: sessionml_batch.run(\n",
+    "    [sessionml.get_outputs()[0].name],\n",
+    "    {sessionml.get_inputs()[0].name: inference_raw_data},\n",
+    "), number=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[ 1.41436096 -0.27520206 -0.32896408 -0.08315643 -0.26660063 -0.30202675\n",
+      " -0.22120572 -0.35424621 -0.25634644 -0.06725079]\n",
+      "[ 1.7355299   0.2582493   0.28444618  0.51784474  0.49668223 -0.04218447\n",
+      "  0.12811233  0.20044815 -0.10399695  0.61548153]\n",
+      "[ 1.7355288   0.25824943  0.28444648  0.5178444   0.49668252 -0.04218467\n",
+      "  0.12811226  0.20044814 -0.10399713  0.6154818 ]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(predictions_array_lgbmpython[:10])\n",
+    "print(predictions_array_treelite[:10])\n",
+    "print(predictions_array_ort[:10])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.8.13 ('lightgbmbenchmark')",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.13"
+  },
+  "orig_nbformat": 4,
+  "vscode": {
+   "interpreter": {
+    "hash": "218deddc5dc66f2d9cab81f1bf3043b58bb8ede28fae2157142347a8a27e0fa5"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
index b4fc6530..eb317acc 100644
--- a/src/scripts/inferencing/lightgbm_ort/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -1,10 +1,10 @@
 $schema: http://azureml/sdk-2-0/CommandComponent.json
 name: lightgbm_ort_score
-version: 0.0.6
+version: 0.0.7
 display_name: "LightGBM Inferencing (ONNX RT)"
 type: CommandComponent
 description: "LightGBM inferencing using the ONNX Runtime."
-is_deterministic: true
+is_deterministic: false
 inputs:
   data:
     type: AnyDirectory

From 0136a526532813eda382ca2d9b51e68fa7598b8c Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 12 Dec 2022 09:29:09 -0800
Subject: [PATCH 15/20] update ort inferencing

---
 src/pipelines/azureml/lightgbm_inferencing.py | 29 ++++++++++---------
 .../lightgbm_ort/default.dockerfile           |  8 ++---
 .../inferencing/lightgbm_ort/spec.yaml        |  5 +++-
 3 files changed, 23 insertions(+), 19 deletions(-)

diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py
index f272180d..21bbef06 100644
--- a/src/pipelines/azureml/lightgbm_inferencing.py
+++ b/src/pipelines/azureml/lightgbm_inferencing.py
@@ -8,7 +8,7 @@
 > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config  conf/experiments/lightgbm-inferencing.yaml
 """
 # pylint: disable=no-member
-# NOTE: because it raises 'dict' has no 'outputs' member in dsl.pipeline construction
+# NOTE: because it raises "dict" has no "outputs" member in dsl.pipeline construction
 import os
 import sys
 import json
@@ -25,8 +25,8 @@
 from azure.ml.component.environment import Docker
 
 # when running this script directly, needed to import common
-LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..'))
-SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, 'src')
+LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))
+SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, "src")
 
 if SCRIPTS_SOURCES_ROOT not in sys.path:
     logging.info(f"Adding {SCRIPTS_SOURCES_ROOT} to path")
@@ -82,7 +82,7 @@ class lightgbm_inferencing_config: # pylint: disable=invalid-name
 
 @dsl.pipeline(name=f"lightgbm_inferencing", # pythonic name
                 description=f"LightGBM inferencing on user defined dataset/model",
-                non_pipeline_parameters=['benchmark_custom_properties', 'config'])
+                non_pipeline_parameters=["benchmark_custom_properties", "config"])
 def inferencing_task_pipeline_function(benchmark_custom_properties,
                                        config,
                                        data,
@@ -107,9 +107,9 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
         custom_properties = benchmark_custom_properties.copy()
         custom_properties.update({
             # adding build settings (docker)
-            'framework_build' : variant.build or "default",
+            "framework_build" : variant.build or "default",
             # adding variant_index to spot which variant is the reference
-            'variant_index' : variant_index
+            "variant_index" : variant_index
         })
         # passing as json string that each module parses to digest as tags/properties
         custom_properties = json.dumps(custom_properties)
@@ -152,7 +152,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
                 data = data,
                 model = model,
                 verbose = False,
-                custom_properties = custom_properties.replace("\"","\\\"")
+                custom_properties = custom_properties.replace("\"", "\\\"")
             )
             inferencing_step.runsettings.configure(target=config.compute.windows_cpu)
 
@@ -173,7 +173,8 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
                 data = data,
                 model = model,
                 verbose = False,
-                custom_properties = custom_properties
+                custom_properties = custom_properties,
+                predict_disable_shape_check = predict_disable_shape_check,
             )
             inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
 
@@ -186,7 +187,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
                 run_parallel = variant.parallel_exec,
                 run_batch = variant.batch_exec,
                 n_threads = variant.threads,
-                custom_properties = custom_properties
+                custom_properties = custom_properties.replace("\"", "\\\"")
             )
             inferencing_step.runsettings.configure(target=config.compute.linux_cpu)
 
@@ -216,7 +217,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
         # provide step readable display name
         inferencing_step.node_name = format_run_name(f"inferencing_{variant.framework}_{variant_index}")
 
-    # return {key: output}'
+    # return {key: output}
     return pipeline_outputs
 
 
@@ -225,7 +226,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties,
     non_pipeline_parameters=["workspace", "config"] # required to use config object
 )
 def inferencing_all_tasks(workspace, config):
-    """Pipeline's main building function.
+    """Pipeline"s main building function.
 
     Args:
         workspace (azureml.core.Workspace): the AzureML workspace
@@ -242,9 +243,9 @@ def inferencing_all_tasks(workspace, config):
 
         # create custom properties for this task
         benchmark_custom_properties = {
-            'benchmark_name' : config.lightgbm_inferencing_config.benchmark_name, 
-            'benchmark_dataset' : inferencing_task.data.name,
-            'benchmark_model' : inferencing_task.model.name,
+            "benchmark_name" : config.lightgbm_inferencing_config.benchmark_name, 
+            "benchmark_dataset" : inferencing_task.data.name,
+            "benchmark_model" : inferencing_task.model.name,
         }
 
         inferencing_task_subgraph_step = inferencing_task_pipeline_function(
diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
index 01e5614c..f76ac583 100644
--- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -7,16 +7,16 @@ ARG lightgbm_version="3.3.0"
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
 
 # Install pip dependencies
 RUN HOROVOD_WITH_TENSORFLOW=1 \
-    pip install 'pandas>=1.1,<1.2' \
-                'numpy>=1.10,<1.20' \
-                'matplotlib==3.4.3' \
+    pip install 'pandas==1.5.2' \
+                'numpy==1.23.5' \
+                'matplotlib==3.6.2' \
                 'scipy~=1.5.0' \
                 'scikit-learn~=0.24.1' \
                 'azureml-core==1.35.0' \
diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml
index eb317acc..d4288abd 100644
--- a/src/scripts/inferencing/lightgbm_ort/spec.yaml
+++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml
@@ -40,6 +40,9 @@ outputs:
     type: AnyDirectory
 
 command: >-
+  apt update -y &&
+  apt install numactl -y &&
+  numactl -m 0 -N 0 -- 
   python score.py
   --data {inputs.data}
   --model {inputs.model}
@@ -49,7 +52,7 @@ command: >-
   [--run_batch {inputs.run_batch}]
   --predict_disable_shape_check {inputs.predict_disable_shape_check}
   --verbose {inputs.verbose}
-  [--custom_properties {inputs.custom_properties}]
+  [--custom_properties "{inputs.custom_properties}"]
   --cluster_auto_setup True
 
 environment:

From 6a740f19b428425d51707905ca0a5d8ab7269fbb Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 12 Dec 2022 09:34:11 -0800
Subject: [PATCH 16/20] pass predict_disable_shape_check for ray

---
 src/scripts/inferencing/lightgbm_ray/score.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_ray/score.py b/src/scripts/inferencing/lightgbm_ray/score.py
index 67561c68..105725ef 100644
--- a/src/scripts/inferencing/lightgbm_ray/score.py
+++ b/src/scripts/inferencing/lightgbm_ray/score.py
@@ -103,7 +103,8 @@ def run(self, args, logger, metrics_logger, unknown_args):
         predictions_array = lightgbm_ray.predict(
             booster,
             inference_data,
-            ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads)
+            ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads),
+            predict_disable_shape_check=bool(args.predict_disable_shape_check),
         )
 
         timeit_loops = 10
@@ -111,7 +112,8 @@ def run(self, args, logger, metrics_logger, unknown_args):
             lambda: lightgbm_ray.predict(
                 booster,
                 inference_data,
-                ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads)
+                ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads),
+                predict_disable_shape_check=bool(args.predict_disable_shape_check),
             ),
             number=timeit_loops,
         )

From c281d275315ff4a43105986e2cfc2264ef489c8d Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 12 Dec 2022 09:39:37 -0800
Subject: [PATCH 17/20] update docker images and pip

---
 .github/workflows/azureml_pipelines.yml                    | 2 +-
 .github/workflows/benchmark_scripts.yml                    | 2 +-
 .github/workflows/docs.yml                                 | 2 +-
 docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile | 4 ++--
 docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile | 4 ++--
 docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile      | 4 ++--
 docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile        | 4 ++--
 docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile         | 4 ++--
 docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile          | 4 ++--
 docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile            | 4 ++--
 docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile          | 2 +-
 docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile      | 4 ++--
 docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile        | 2 +-
 docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile          | 2 +-
 src/scripts/data_processing/generate_data/conda_env.yaml   | 2 +-
 .../data_processing/lightgbm_data2bin/conda_env.yml        | 2 +-
 src/scripts/data_processing/partition_data/conda_env.yml   | 2 +-
 src/scripts/inferencing/custom_win_cli/conda_env.yaml      | 2 +-
 src/scripts/inferencing/lightgbm_c_api/default.dockerfile  | 4 ++--
 src/scripts/inferencing/lightgbm_python/default.dockerfile | 4 ++--
 src/scripts/inferencing/lightgbm_ray/default.dockerfile    | 7 ++++---
 src/scripts/inferencing/treelite_python/conda_env.yaml     | 2 +-
 .../model_transformation/treelite_compile/conda_env.yaml   | 2 +-
 src/scripts/sample/conda_env.yaml                          | 2 +-
 src/scripts/training/lightgbm_python/default.dockerfile    | 2 +-
 src/scripts/training/lightgbm_ray/default.dockerfile       | 2 +-
 src/scripts/training/ray_tune/default.dockerfile           | 2 +-
 .../training/ray_tune_distributed/default.dockerfile       | 2 +-
 28 files changed, 41 insertions(+), 40 deletions(-)

diff --git a/.github/workflows/azureml_pipelines.yml b/.github/workflows/azureml_pipelines.yml
index 2efa112e..364b4cc0 100644
--- a/.github/workflows/azureml_pipelines.yml
+++ b/.github/workflows/azureml_pipelines.yml
@@ -34,7 +34,7 @@ jobs:
 
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip==21.3.1
+        python -m pip install --upgrade pip==22.2.2
         pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11
         sudo apt-get install libopenmpi-dev
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
diff --git a/.github/workflows/benchmark_scripts.yml b/.github/workflows/benchmark_scripts.yml
index 35b88423..75f2e1f6 100644
--- a/.github/workflows/benchmark_scripts.yml
+++ b/.github/workflows/benchmark_scripts.yml
@@ -42,7 +42,7 @@ jobs:
     - name: Install dependencies
       run: |
         sudo apt-get install libopenmpi-dev
-        python -m pip install --upgrade pip==21.3.1
+        python -m pip install --upgrade pip==22.2.2
         pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11
         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
         # hotfix for azurecli issue
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index 2ce8b02d..9ed1a320 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -32,7 +32,7 @@ jobs:
 
     - name: pip install
       run: |
-        python -m pip install --upgrade pip==21.3.1
+        python -m pip install --upgrade pip==22.2.2
 
         python -m pip install markdown-include==0.7.0 mkdocstrings==0.19.0 mkdocstrings-python==0.7.1 mkdocs-material==8.4.2 livereload==2.6.3
 
diff --git a/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile b/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
index d8b46623..7f3748d3 100644
--- a/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1-patch/20211109.1"
 
 # Those arguments will NOT be used by AzureML
@@ -54,7 +54,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile b/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
index 7041380f..a7233019 100644
--- a/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0-patch/20211109.1"
 
 # Those arguments will NOT be used by AzureML
@@ -54,7 +54,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile b/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
index 67eb7d88..6c05ad28 100644
--- a/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1/20211109.1"
 
 # Those arguments will NOT be used by AzureML
@@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile b/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile
index 7f415f76..b42984f0 100644
--- a/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.2.1/20211108.1"
 
 # Those arguments will NOT be used by AzureML
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile b/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile
index 1f85db8b..58104eb0 100644
--- a/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cuda.build.version="3.2.1/20211108.1"
 
 # Those arguments will NOT be used by AzureML
@@ -73,7 +73,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile b/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile
index e3d5cc01..047f985a 100644
--- a/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.gpu.build.version="3.2.1/20211108.1"
 
 # Those arguments will NOT be used by AzureML
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile b/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile
index a8e6dea7..4cba3571 100644
--- a/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile
+++ b/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.gpu.pip.version="3.2.1/20211108.1"
 # Those arguments will NOT be used by AzureML
 # they are here just to allow for lightgbm-benchmark build to actually check
@@ -9,7 +9,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile b/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile
index cf9f2a6b..9e984465 100644
--- a/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile
+++ b/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 #ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile b/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile
index 4f66f5a0..09b75851 100644
--- a/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile
+++ b/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1"
 
 # Those arguments will NOT be used by AzureML
@@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile b/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile
index 59ca3f32..e0d9056c 100644
--- a/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile
+++ b/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile b/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile
index f40508e0..4d974d65 100644
--- a/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile
+++ b/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 #ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/data_processing/generate_data/conda_env.yaml b/src/scripts/data_processing/generate_data/conda_env.yaml
index 223c34f7..d454ecdb 100644
--- a/src/scripts/data_processing/generate_data/conda_env.yaml
+++ b/src/scripts/data_processing/generate_data/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - numpy==1.21.2
   - scikit-learn==0.24.2
diff --git a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
index 2c403f37..b7f65aba 100644
--- a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
+++ b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - numpy==1.21.2
   - scikit-learn==0.24.2
diff --git a/src/scripts/data_processing/partition_data/conda_env.yml b/src/scripts/data_processing/partition_data/conda_env.yml
index 39dabefc..395ab493 100644
--- a/src/scripts/data_processing/partition_data/conda_env.yml
+++ b/src/scripts/data_processing/partition_data/conda_env.yml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - numpy==1.21.2
   - scikit-learn==0.24.2
diff --git a/src/scripts/inferencing/custom_win_cli/conda_env.yaml b/src/scripts/inferencing/custom_win_cli/conda_env.yaml
index 78eed94f..cd181b8e 100644
--- a/src/scripts/inferencing/custom_win_cli/conda_env.yaml
+++ b/src/scripts/inferencing/custom_win_cli/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
diff --git a/src/scripts/inferencing/lightgbm_c_api/default.dockerfile b/src/scripts/inferencing/lightgbm_c_api/default.dockerfile
index f50022c5..4f22ca78 100644
--- a/src/scripts/inferencing/lightgbm_c_api/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_c_api/default.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1"
 
 # Those arguments will NOT be used by AzureML
@@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/inferencing/lightgbm_python/default.dockerfile b/src/scripts/inferencing/lightgbm_python/default.dockerfile
index 419a5444..6ceda711 100644
--- a/src/scripts/inferencing/lightgbm_python/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_python/default.dockerfile
@@ -1,4 +1,4 @@
-FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20211210.1"
 
 # Those arguments will NOT be used by AzureML
@@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/inferencing/lightgbm_ray/default.dockerfile b/src/scripts/inferencing/lightgbm_ray/default.dockerfile
index 67a87a2b..4cea04e3 100644
--- a/src/scripts/inferencing/lightgbm_ray/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ray/default.dockerfile
@@ -5,15 +5,15 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
 
 # Install pip dependencies
 RUN HOROVOD_WITH_TENSORFLOW=1 \
-    pip install 'pandas>=1.1,<1.2' \
-                'numpy>=1.10,<1.20' \
+    pip install 'pandas==1.5.2' \
+                'numpy==1.23.5' \
                 'matplotlib==3.4.3' \
                 'scipy~=1.5.0' \
                 'scikit-learn~=0.24.1' \
@@ -23,4 +23,5 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
                 'azureml-telemetry==1.35.0' \
                 'mpi4py==3.1.1' \
                 'ray==1.9.2' \
+                'protobuf==3.20' \
                 'lightgbm-ray==0.1.2'
diff --git a/src/scripts/inferencing/treelite_python/conda_env.yaml b/src/scripts/inferencing/treelite_python/conda_env.yaml
index b31a7368..3d04b774 100644
--- a/src/scripts/inferencing/treelite_python/conda_env.yaml
+++ b/src/scripts/inferencing/treelite_python/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
diff --git a/src/scripts/model_transformation/treelite_compile/conda_env.yaml b/src/scripts/model_transformation/treelite_compile/conda_env.yaml
index b31a7368..3d04b774 100644
--- a/src/scripts/model_transformation/treelite_compile/conda_env.yaml
+++ b/src/scripts/model_transformation/treelite_compile/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
diff --git a/src/scripts/sample/conda_env.yaml b/src/scripts/sample/conda_env.yaml
index 0201788d..be0745eb 100644
--- a/src/scripts/sample/conda_env.yaml
+++ b/src/scripts/sample/conda_env.yaml
@@ -3,7 +3,7 @@ channels:
 - defaults
 dependencies:
 - python=3.8
-- pip=20.0
+- pip=22.2.2
 - pip:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
diff --git a/src/scripts/training/lightgbm_python/default.dockerfile b/src/scripts/training/lightgbm_python/default.dockerfile
index 6848faf6..8c12a696 100644
--- a/src/scripts/training/lightgbm_python/default.dockerfile
+++ b/src/scripts/training/lightgbm_python/default.dockerfile
@@ -31,7 +31,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/training/lightgbm_ray/default.dockerfile b/src/scripts/training/lightgbm_ray/default.dockerfile
index 67a87a2b..0f0db1aa 100644
--- a/src/scripts/training/lightgbm_ray/default.dockerfile
+++ b/src/scripts/training/lightgbm_ray/default.dockerfile
@@ -5,7 +5,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/training/ray_tune/default.dockerfile b/src/scripts/training/ray_tune/default.dockerfile
index fcf659e6..350d9fd6 100644
--- a/src/scripts/training/ray_tune/default.dockerfile
+++ b/src/scripts/training/ray_tune/default.dockerfile
@@ -7,7 +7,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
diff --git a/src/scripts/training/ray_tune_distributed/default.dockerfile b/src/scripts/training/ray_tune_distributed/default.dockerfile
index 5bb04a04..acba0893 100644
--- a/src/scripts/training/ray_tune_distributed/default.dockerfile
+++ b/src/scripts/training/ray_tune_distributed/default.dockerfile
@@ -7,7 +7,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
-    python=3.8 pip=20.2.4
+    python=3.8 pip=22.2.2
 
 # Prepend path to AzureML conda environment
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH

From 3a36ce64c8e060c806690e9eb9286916fe315c94 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 12 Dec 2022 13:12:28 -0800
Subject: [PATCH 18/20] update ort docker deps

---
 src/scripts/inferencing/lightgbm_ort/default.dockerfile | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
index f76ac583..caec0a91 100644
--- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -1,5 +1,4 @@
 FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
-LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1"
 
 ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
@@ -30,4 +29,4 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
 
 # install lightgbm with mpi
 RUN pip install lightgbm==${lightgbm_version} \
-    pip install 'protobuf==3.20'
\ No newline at end of file
+    pip install 'protobuf==3.20.3'
\ No newline at end of file

From c86e0982c68180ce2ab2fd99dc929af15139dc24 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Mon, 12 Dec 2022 13:14:09 -0800
Subject: [PATCH 19/20] update ray docker deps

---
 src/scripts/inferencing/lightgbm_ray/default.dockerfile | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_ray/default.dockerfile b/src/scripts/inferencing/lightgbm_ray/default.dockerfile
index 4cea04e3..10b639dd 100644
--- a/src/scripts/inferencing/lightgbm_ray/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ray/default.dockerfile
@@ -22,6 +22,6 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \
                 'azureml-mlflow==1.35.0' \
                 'azureml-telemetry==1.35.0' \
                 'mpi4py==3.1.1' \
-                'ray==1.9.2' \
-                'protobuf==3.20' \
-                'lightgbm-ray==0.1.2'
+                'protobuf==3.20.3' \
+                'ray==2.1.0' \
+                'lightgbm-ray==0.1.8'

From 75103fdff287032d77fc31b8b8f2104b710244a2 Mon Sep 17 00:00:00 2001
From: David Majercak <damajercak@microsoft.com>
Date: Tue, 13 Dec 2022 23:54:13 -0800
Subject: [PATCH 20/20] only essential python packages

---
 .../lightgbm_ort/default.dockerfile           | 21 +++-----
 src/scripts/inferencing/lightgbm_ort/score.py | 49 +++++++------------
 2 files changed, 24 insertions(+), 46 deletions(-)

diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
index caec0a91..e161bfbb 100644
--- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile
+++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile
@@ -1,8 +1,8 @@
-FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest
+FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1
 
 ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm
 
-ARG lightgbm_version="3.3.0"
+ARG lightgbm_version="3.3.3"
 
 # Create conda environment
 RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
@@ -12,20 +12,13 @@ RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
 ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
 
 # Install pip dependencies
-RUN HOROVOD_WITH_TENSORFLOW=1 \
-    pip install 'pandas==1.5.2' \
-                'numpy==1.23.5' \
+RUN pip install 'numpy==1.23.5' \
                 'matplotlib==3.6.2' \
-                'scipy~=1.5.0' \
-                'scikit-learn~=0.24.1' \
-                'azureml-core==1.35.0' \
-                'azureml-defaults==1.35.0' \
-                'azureml-mlflow==1.35.0' \
-                'azureml-telemetry==1.35.0' \
-                'mpi4py==3.1.1' \
-                'onnxruntime==1.12.1' \
+                'psutil==5.9.4'\
+                'azureml-mlflow==1.48.0' \
+                'onnxruntime==1.13.1' \
                 'onnxmltools==1.11.1' \
-                'onnxconverter-common==1.12.2'
+                'onnxconverter-common==1.13.0'
 
 # install lightgbm with mpi
 RUN pip install lightgbm==${lightgbm_version} \
diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py
index b5ee24be..f6e585c9 100644
--- a/src/scripts/inferencing/lightgbm_ort/score.py
+++ b/src/scripts/inferencing/lightgbm_ort/score.py
@@ -6,9 +6,7 @@
 """
 import os
 import sys
-import argparse
 import logging
-import time
 import timeit
 import numpy as np
 from distutils.util import strtobool
@@ -155,34 +153,23 @@ def run(self, args, logger, metrics_logger, unknown_args):
         assert args.data_format == "CSV"
         with metrics_logger.log_time_block("time_data_loading"):
             # NOTE: this is bad, but allows for libsvm format (not just numpy)
-            inference_data = lightgbm.Dataset(
-                args.data, free_raw_data=False
-            ).construct()
-            inference_raw_data = inference_data.get_data()
-            if type(inference_raw_data) == str:
-                inference_raw_data = np.loadtxt(
-                    inference_raw_data, delimiter=","
-                ).astype(np.float32)[:, : inference_data.num_feature()]
+            # inference_data = lightgbm.Dataset(
+            #     args.data, free_raw_data=False
+            # ).construct()
+            # inference_raw_data = inference_data.get_data()
+            # if type(inference_raw_data) == str:
+            inference_raw_data = np.loadtxt(
+                args.data, delimiter=","
+            ).astype(np.float32)[:, : booster.num_feature()]
 
         logger.info(f"Converting model to ONNX")
         onnx_input_types = [
             (
                 "input",
-                FloatTensorType([1, inference_data.num_feature()]),
-            )
-        ]
-        onnx_batch_input_types = [
-            (
-                "input",
-                FloatTensorType(
-                    [inference_data.num_data(), inference_data.num_feature()]
-                ),
+                FloatTensorType([None, inference_raw_data.shape[1]]),
             )
         ]
         onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types)
-        onnx_ml_batch_model = convert_lightgbm(
-            booster, initial_types=onnx_batch_input_types
-        )
 
         logger.info(f"Creating inference session")
         sess_options = ort.SessionOptions()
@@ -206,14 +193,11 @@ def run(self, args, logger, metrics_logger, unknown_args):
         sessionml = ort.InferenceSession(
             onnx_ml_model.SerializeToString(), sess_options
         )
-        sessionml_batch = ort.InferenceSession(
-            onnx_ml_batch_model.SerializeToString(), sess_options
-        )
 
         # capture data shape as property
         metrics_logger.set_properties(
-            inference_data_length=inference_data.num_data(),
-            inference_data_width=inference_data.num_feature(),
+            inference_data_length=inference_raw_data.shape[0],
+            inference_data_width=inference_raw_data.shape[1],
         )
 
         logger.info(f"Running .predict()")
@@ -223,8 +207,9 @@ def run(self, args, logger, metrics_logger, unknown_args):
             sessionml.run(
                 [sessionml.get_outputs()[0].name],
                 {sessionml.get_inputs()[0].name: inference_raw_data[0:1]},
-            )[0]
-        predictions_array = sessionml_batch.run(
+            )
+
+        predictions_array = sessionml.run(
             [sessionml.get_outputs()[0].name],
             {sessionml.get_inputs()[0].name: inference_raw_data},
         )[0]
@@ -235,9 +220,9 @@ def run(self, args, logger, metrics_logger, unknown_args):
         if args.run_batch:
             batch_length = len(inference_raw_data)
             prediction_time = timeit.timeit(
-                lambda: sessionml_batch.run(
-                    [sessionml_batch.get_outputs()[0].name],
-                    {sessionml_batch.get_inputs()[0].name: inference_raw_data},
+                lambda: sessionml.run(
+                    [sessionml.get_outputs()[0].name],
+                    {sessionml.get_inputs()[0].name: inference_raw_data},
                 ),
                 number=timeit_loops,
             )