From 84154e0a6377136072ff10f6e965bca061450cc1 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Fri, 14 Oct 2022 17:11:35 -0700 Subject: [PATCH 01/20] WIP --- src/pipelines/azureml/lightgbm_inferencing.py | 11 + .../data_processing/generate_data/generate.py | 3 +- .../inferencing/lightgbm_ort/.amlignore | 4 + .../lightgbm_ort/default.dockerfile | 27 +++ src/scripts/inferencing/lightgbm_ort/score.py | 227 ++++++++++++++++++ .../lightgbm_ort/spec.additional_includes | 2 + .../inferencing/lightgbm_ort/spec.yaml | 54 +++++ 7 files changed, 327 insertions(+), 1 deletion(-) create mode 100644 src/scripts/inferencing/lightgbm_ort/.amlignore create mode 100644 src/scripts/inferencing/lightgbm_ort/default.dockerfile create mode 100644 src/scripts/inferencing/lightgbm_ort/score.py create mode 100644 src/scripts/inferencing/lightgbm_ort/spec.additional_includes create mode 100644 src/scripts/inferencing/lightgbm_ort/spec.yaml diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py index b82a9690..f9cfc171 100644 --- a/src/pipelines/azureml/lightgbm_inferencing.py +++ b/src/pipelines/azureml/lightgbm_inferencing.py @@ -67,6 +67,7 @@ class lightgbm_inferencing_config: # pylint: disable=invalid-name lightgbm_python_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_python", "spec.yaml")) lightgbm_c_api_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_c_api", "spec.yaml")) lightgbm_ray_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_ray", "spec.yaml")) +lightgbm_ort_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "lightgbm_ort", "spec.yaml")) custom_win_cli_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "custom_win_cli", "spec.yaml")) treelite_compile_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "model_transformation", "treelite_compile", "spec.yaml")) treelite_score_module = Component.from_yaml(yaml_file=os.path.join(COMPONENTS_ROOT, "inferencing", "treelite_python", "spec.yaml")) @@ -176,6 +177,16 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, ) inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + elif variant.framework == "lightgbm_ort": + # call module with all the right arguments + inferencing_step = lightgbm_ort_score_module( + data = data, + model = model, + verbose = False, + custom_properties = custom_properties + ) + inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + else: raise NotImplementedError(f"framework {variant.framework} not implemented (yet)") diff --git a/src/scripts/data_processing/generate_data/generate.py b/src/scripts/data_processing/generate_data/generate.py index ba5445d7..0f1a9163 100644 --- a/src/scripts/data_processing/generate_data/generate.py +++ b/src/scripts/data_processing/generate_data/generate.py @@ -252,7 +252,8 @@ def run(self, args, logger, metrics_logger, unknown_args): os.makedirs(args.output_train, exist_ok=True) os.makedirs(args.output_test, exist_ok=True) os.makedirs(args.output_inference, exist_ok=True) - os.makedirs(args.external_header, exist_ok=True) + if args.external_header: + os.makedirs(args.external_header, exist_ok=True) # transform delimiter diff --git a/src/scripts/inferencing/lightgbm_ort/.amlignore b/src/scripts/inferencing/lightgbm_ort/.amlignore new file mode 100644 index 00000000..749ccdaf --- /dev/null +++ b/src/scripts/inferencing/lightgbm_ort/.amlignore @@ -0,0 +1,4 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile new file mode 100644 index 00000000..2b807f43 --- /dev/null +++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile @@ -0,0 +1,27 @@ +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest +LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" + +ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm + +# Create conda environment +RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ + python=3.8 pip=20.2.4 + +# Prepend path to AzureML conda environment +ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH + +# Install pip dependencies +RUN HOROVOD_WITH_TENSORFLOW=1 \ + pip install 'pandas>=1.1,<1.2' \ + 'numpy>=1.10,<1.20' \ + 'matplotlib==3.4.3' \ + 'scipy~=1.5.0' \ + 'scikit-learn~=0.24.1' \ + 'azureml-core==1.35.0' \ + 'azureml-defaults==1.35.0' \ + 'azureml-mlflow==1.35.0' \ + 'azureml-telemetry==1.35.0' \ + 'mpi4py==3.1.1' \ + 'onnxruntime==1.12.1' \ + 'onnxmltools==1.11.1' \ + 'onnxconverter-common==1.12.2' diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py new file mode 100644 index 00000000..23f00c7a --- /dev/null +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -0,0 +1,227 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +LightGBM/Python inferencing script +""" +import os +import sys +import argparse +import logging +import time +import timeit +import numpy as np +from distutils.util import strtobool +import lightgbm + +import onnxruntime as ort +from onnxmltools.convert import convert_lightgbm +from onnxconverter_common.data_types import FloatTensorType + +COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) + +if COMMON_ROOT not in sys.path: + logging.info(f"Adding {COMMON_ROOT} to PYTHONPATH") + sys.path.append(str(COMMON_ROOT)) + +# useful imports from common +from common.components import RunnableScript +from common.io import input_file_path + + +class LightGBMONNXRTInferecingScript(RunnableScript): + def __init__(self): + super().__init__( + task="score", + framework="lightgbm", + framework_version="ONNXRT." + str(ort.__version__), + ) + + @classmethod + def get_arg_parser(cls, parser=None): + """Adds component/module arguments to a given argument parser. + + Args: + parser (argparse.ArgumentParser): an argument parser instance + + Returns: + ArgumentParser: the argument parser instance + + Notes: + if parser is None, creates a new parser instance + """ + # add generic arguments + parser = RunnableScript.get_arg_parser(parser) + + group_i = parser.add_argument_group(f"Input Data [{__name__}:{cls.__name__}]") + group_i.add_argument( + "--data", + required=True, + type=input_file_path, + help="Inferencing data location (file path)", + ) + group_i.add_argument( + "--data_format", + type=str, + choices=["CSV", "PARQUET", "PETASTORM"], + default="CSV", + ) + group_i.add_argument( + "--model", + required=False, + type=input_file_path, + help="Exported model location (file path)", + ) + group_i.add_argument( + "--output", + required=False, + default=None, + type=str, + help="Inferencing output location (file path)", + ) + + group_params = parser.add_argument_group( + f"Scoring parameters [{__name__}:{cls.__name__}]" + ) + group_params.add_argument( + "--num_threads", + required=False, + default=1, + type=int, + help="number of threads", + ) + group_params.add_argument( + "--predict_disable_shape_check", + required=False, + default=False, + type=strtobool, + help="See LightGBM documentation", + ) + + return parser + + def run(self, args, logger, metrics_logger, unknown_args): + """Run script with arguments (the core of the component) + + Args: + args (argparse.namespace): command line arguments provided to script + logger (logging.getLogger() for this script) + metrics_logger (common.metrics.MetricLogger) + unknown_args (list[str]): list of arguments not recognized during argparse + """ + # record relevant parameters + metrics_logger.log_parameters(num_threads=args.num_threads) + + # register logger for lightgbm logs + lightgbm.register_logger(logger) + + # make sure the output argument exists + if args.output: + os.makedirs(args.output, exist_ok=True) + args.output = os.path.join(args.output, "predictions.txt") + + logger.info(f"Loading model from {args.model}") + booster = lightgbm.Booster(model_file=args.model) + + logger.info(f"Loading data for inferencing") + assert args.data_format == "CSV" + with metrics_logger.log_time_block("time_data_loading"): + # NOTE: this is bad, but allows for libsvm format (not just numpy) + inference_data = lightgbm.Dataset( + args.data, free_raw_data=False + ).construct() + inference_raw_data = inference_data.get_data() + if type(inference_raw_data) == str: + inference_raw_data = np.loadtxt( + inference_raw_data, delimiter="," + ).astype(np.float32)[:, : inference_data.num_feature()] + + logger.info(f"Converting model to ONNX") + onnx_input_types = [ + ( + "input", + FloatTensorType( + [inference_data.num_data(), inference_data.num_feature()] + ), + ) + ] + onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types) + + logger.info(f"Creating inference session") + sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString(), sess) + + # capture data shape as property + metrics_logger.set_properties( + inference_data_length=inference_data.num_data(), + inference_data_width=inference_data.num_feature(), + ) + + logger.info(f"Running .predict()") + + batch_start_time = time.monotonic() + sessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data}, + ) + + # onnxml_time = timeit.timeit( + # "sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data} )", + # number=10, + # setup="from __main__ import sessionml, inference_raw_data", + # ) + onnxml_time = timeit.timeit( + lambda: sessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data}, + ), + number=10, + ) + print( + "LGBM->ONNXML (CPU): {}".format( + num_classes, max_depth, n_estimators, onnxml_time + ) + ) + + booster.num_trees + batch_start_time = time.monotonic() + predictions_array = booster.predict( + data=inference_raw_data, + num_threads=args.num_threads, + predict_disable_shape_check=bool(args.predict_disable_shape_check), + ) + prediction_time = time.monotonic() - batch_start_time + metrics_logger.log_metric("time_inferencing", prediction_time) + + # use helper to log latency with the right metric names + metrics_logger.log_inferencing_latencies( + [prediction_time], # only one big batch + batch_length=inference_data.num_data(), + factor_to_usecs=1000000.0, # values are in seconds + ) + + if args.output: + np.savetxt( + args.output, + predictions_array, + fmt="%f", + delimiter=",", + newline="\n", + header="", + footer="", + comments="# ", + encoding=None, + ) + + +def get_arg_parser(parser=None): + """To ensure compatibility with shrike unit tests""" + return LightGBMONNXRTInferecingScript.get_arg_parser(parser) + + +def main(cli_args=None): + """To ensure compatibility with shrike unit tests""" + LightGBMONNXRTInferecingScript.main(cli_args) + + +if __name__ == "__main__": + main() diff --git a/src/scripts/inferencing/lightgbm_ort/spec.additional_includes b/src/scripts/inferencing/lightgbm_ort/spec.additional_includes new file mode 100644 index 00000000..13e7552d --- /dev/null +++ b/src/scripts/inferencing/lightgbm_ort/spec.additional_includes @@ -0,0 +1,2 @@ +../../../common/ +../../../../docker/ diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml new file mode 100644 index 00000000..b0ca1315 --- /dev/null +++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml @@ -0,0 +1,54 @@ +$schema: http://azureml/sdk-2-0/CommandComponent.json +name: lightgbm_ort_score +version: 0.0.1 +display_name: "LightGBM Inferencing (ONNX RT)" +type: CommandComponent +description: "LightGBM inferencing using the ONNX Runtime." +is_deterministic: true +inputs: + data: + type: AnyDirectory + description: directory to the inference data + optional: false + model: + type: AnyDirectory + description: directory to the model + optional: false + predict_disable_shape_check: + type: Boolean + description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" + default: False + n_threads: + type: Integer + optional: true + verbose: + type: Boolean + default: False + custom_properties: + type: String + description: additional custom tags for the job + optional: true + +outputs: + predictions: + type: AnyDirectory + +command: >- + python score.py + --data {inputs.data} + --model {inputs.model} + --output {outputs.predictions} + [--num_threads {inputs.n_threads}] + --predict_disable_shape_check {inputs.predict_disable_shape_check} + --verbose {inputs.verbose} + [--custom_properties {inputs.custom_properties}] + --cluster_auto_setup True + +environment: + docker: + build: + # file path is resolved after additional includes + dockerfile: file:./default.dockerfile + conda: + userManagedDependencies: true + os: Linux From b449733bd71ebb15edf6a2136bfe43157d457369 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 17 Oct 2022 16:59:01 -0700 Subject: [PATCH 02/20] add onnx inference --- .../benchmarks/lightgbm-inferencing.yaml | 4 ++ conf/experiments/lightgbm-inferencing.yaml | 4 ++ .../lightgbm_ort/default.dockerfile | 6 +++ src/scripts/inferencing/lightgbm_ort/score.py | 40 +++++-------------- 4 files changed, 25 insertions(+), 29 deletions(-) diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml index 188b7b0c..13874a6e 100644 --- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml +++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml @@ -14,6 +14,9 @@ experiment: name: "lightgbm_inferencing_dev" description: "something interesting to say about this" +run: + submit: true + lightgbm_inferencing_config: # name of your particular benchmark benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime! @@ -80,6 +83,7 @@ lightgbm_inferencing_config: - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile - framework: lightgbm_ray # ray implementation + - framework: lightgbm_ort # ONNX RT implementation - framework: treelite_python # v1.3.0 # to use custom_win_cli, you need to compile your own binaries diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml index 2e68c4d3..f70d2d22 100644 --- a/conf/experiments/lightgbm-inferencing.yaml +++ b/conf/experiments/lightgbm-inferencing.yaml @@ -14,6 +14,9 @@ experiment: name: "lightgbm_inferencing_dev" description: "something interesting to say about this" +run: + submit: true + lightgbm_inferencing_config: # name of your particular benchmark benchmark_name: "benchmark-dev" # override this with a unique name @@ -36,6 +39,7 @@ lightgbm_inferencing_config: - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile - framework: lightgbm_ray # ray implementation + - framework: lightgbm_ort # ONNX RT implementation - framework: treelite_python # v1.3.0 # to use custom_win_cli, you need to compile your own binaries diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile index 2b807f43..01e5614c 100644 --- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile @@ -3,6 +3,8 @@ LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm +ARG lightgbm_version="3.3.0" + # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ python=3.8 pip=20.2.4 @@ -25,3 +27,7 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \ 'onnxruntime==1.12.1' \ 'onnxmltools==1.11.1' \ 'onnxconverter-common==1.12.2' + +# install lightgbm with mpi +RUN pip install lightgbm==${lightgbm_version} \ + pip install 'protobuf==3.20' \ No newline at end of file diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index 23f00c7a..371fec02 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -148,7 +148,7 @@ def run(self, args, logger, metrics_logger, unknown_args): onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types) logger.info(f"Creating inference session") - sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString(), sess) + sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString()) # capture data shape as property metrics_logger.set_properties( @@ -157,45 +157,27 @@ def run(self, args, logger, metrics_logger, unknown_args): ) logger.info(f"Running .predict()") - batch_start_time = time.monotonic() - sessionml.run( + predictions_array = sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data}, - ) + )[0] + prediction_time = time.monotonic() - batch_start_time + metrics_logger.log_metric("time_inferencing", prediction_time) + # TODO: Discuss alternative? # onnxml_time = timeit.timeit( - # "sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data} )", + # lambda: sessionml.run( + # [sessionml.get_outputs()[0].name], + # {sessionml.get_inputs()[0].name: inference_raw_data}, + # ), # number=10, - # setup="from __main__ import sessionml, inference_raw_data", # ) - onnxml_time = timeit.timeit( - lambda: sessionml.run( - [sessionml.get_outputs()[0].name], - {sessionml.get_inputs()[0].name: inference_raw_data}, - ), - number=10, - ) - print( - "LGBM->ONNXML (CPU): {}".format( - num_classes, max_depth, n_estimators, onnxml_time - ) - ) - - booster.num_trees - batch_start_time = time.monotonic() - predictions_array = booster.predict( - data=inference_raw_data, - num_threads=args.num_threads, - predict_disable_shape_check=bool(args.predict_disable_shape_check), - ) - prediction_time = time.monotonic() - batch_start_time - metrics_logger.log_metric("time_inferencing", prediction_time) # use helper to log latency with the right metric names metrics_logger.log_inferencing_latencies( [prediction_time], # only one big batch - batch_length=inference_data.num_data(), + batch_length=len(inference_raw_data), factor_to_usecs=1000000.0, # values are in seconds ) From ee19ce581fbf6a612f81b204cfb4c09b7da2b459 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 17 Oct 2022 18:10:37 -0700 Subject: [PATCH 03/20] wip --- src/scripts/inferencing/lightgbm_ort/score.py | 51 +++++++++++++++---- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index 371fec02..c4a4a1e4 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -90,6 +90,13 @@ def get_arg_parser(cls, parser=None): type=int, help="number of threads", ) + group_params.add_argument( + "--run_parallel", + required=False, + default=True, + type=bool, + help="number of threads", + ) group_params.add_argument( "--predict_disable_shape_check", required=False, @@ -140,15 +147,25 @@ def run(self, args, logger, metrics_logger, unknown_args): onnx_input_types = [ ( "input", - FloatTensorType( - [inference_data.num_data(), inference_data.num_feature()] - ), + FloatTensorType([1, inference_data.num_feature()]), ) ] onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types) logger.info(f"Creating inference session") - sessionml = ort.InferenceSession(onnx_ml_model.SerializeToString()) + sess_options = ort.SessionOptions() + # sess_options.intra_op_num_threads = args.num_threads + sess_options.execution_mode = ( + ort.ExecutionMode.ORT_PARALLEL + if args.run_parallel + else ort.ExecutionMode.ORT_SEQUENTIAL + ) + sess_options.graph_optimization_level = ( + ort.GraphOptimizationLevel.ORT_ENABLE_ALL + ) + sessionml = ort.InferenceSession( + onnx_ml_model.SerializeToString(), sess_options + ) # capture data shape as property metrics_logger.set_properties( @@ -156,14 +173,26 @@ def run(self, args, logger, metrics_logger, unknown_args): inference_data_width=inference_data.num_feature(), ) + # Warmup and compute results + for _ in range(100): + predictions_arraysessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data[0:1]}, + )[0] + logger.info(f"Running .predict()") - batch_start_time = time.monotonic() - predictions_array = sessionml.run( - [sessionml.get_outputs()[0].name], - {sessionml.get_inputs()[0].name: inference_raw_data}, - )[0] - prediction_time = time.monotonic() - batch_start_time - metrics_logger.log_metric("time_inferencing", prediction_time) + time_inferencing_per_query = [] + predictions_array = [] + for i in range(len(inference_raw_data)): + batch_start_time = time.monotonic() + prediction = sessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]}, + )[0] + prediction_time = time.monotonic() - batch_start_time + time_inferencing_per_query.append(prediction_time) + predictions_array.append(prediction) + metrics_logger.log_metric("time_inferencing", sum(prediction_time)) # TODO: Discuss alternative? # onnxml_time = timeit.timeit( From b0ea07c398619f8a13dba18e95113032a7fbaaee Mon Sep 17 00:00:00 2001 From: David Majercak Date: Tue, 18 Oct 2022 11:28:38 -0700 Subject: [PATCH 04/20] update onnx measurements technique --- .../lightgbm-inferencing-prod.yaml | 51 +++++++++++++ src/scripts/inferencing/lightgbm_ort/score.py | 76 +++++++++++++------ 2 files changed, 105 insertions(+), 22 deletions(-) create mode 100644 conf/experiments/lightgbm-inferencing-prod.yaml diff --git a/conf/experiments/lightgbm-inferencing-prod.yaml b/conf/experiments/lightgbm-inferencing-prod.yaml new file mode 100644 index 00000000..d2898f2d --- /dev/null +++ b/conf/experiments/lightgbm-inferencing-prod.yaml @@ -0,0 +1,51 @@ +# This experiment runs multiple variants of lightgbm inferencing + treelite +# on a given user-defined dataset and model +# +# to execute: +# > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml + +defaults: + - aml: custom_prod + - compute: custom_prod + +### CUSTOM PARAMETERS ### + +experiment: + name: "lightgbm_inferencing_prod" + description: "something interesting to say about this" + +run: + submit: true + +lightgbm_inferencing_config: + # name of your particular benchmark + benchmark_name: "benchmark-prod" # override this with a unique name + + # list all the data/model pairs to run inferencing with + tasks: + - data: + name: "NiR4_OFE_FR_NOFF_DATA" + model: + name: "NiR4_OFE_LGBM" + - data: + name: "NiR4_HRS_FR_NOFF_DATA" + model: + name: "NiR4_HRS_LGBM" + + # list all inferencing frameworks and their builds + variants: + # - framework: lightgbm_python # v3.3.0 via pypi + # - framework: lightgbm_c_api # v3.3.0 with C API prediction + # - framework: lightgbm_c_api # v3.3.0 with C API prediction + # build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile + # - framework: lightgbm_c_api # v3.2.1 with C API prediction + # build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile + # - framework: lightgbm_c_api # v3.2.1 with C API prediction + # build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile + # - framework: lightgbm_ray # ray implementation + - framework: lightgbm_ort # ONNX RT implementation + - framework: treelite_python # v1.3.0 + + # to use custom_win_cli, you need to compile your own binaries + # see src/scripts/inferencing/custom_win_cli/static_binaries/README.md + #- framework: custom_win_cli diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index c4a4a1e4..b67a8af2 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -128,7 +128,11 @@ def run(self, args, logger, metrics_logger, unknown_args): args.output = os.path.join(args.output, "predictions.txt") logger.info(f"Loading model from {args.model}") - booster = lightgbm.Booster(model_file=args.model) + # BUG: https://github.com/onnx/onnxmltools/issues/338 + with open(args.model, "r") as mf: + model_str = mf.read() + model_str = model_str.replace("objective=lambdarank", "objective=regression") + booster = lightgbm.Booster(model_str=model_str) logger.info(f"Loading data for inferencing") assert args.data_format == "CSV" @@ -150,7 +154,18 @@ def run(self, args, logger, metrics_logger, unknown_args): FloatTensorType([1, inference_data.num_feature()]), ) ] + onnx_batch_input_types = [ + ( + "input", + FloatTensorType( + [inference_data.num_data(), inference_data.num_feature()] + ), + ) + ] onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types) + onnx_ml_batch_model = convert_lightgbm( + booster, initial_types=onnx_batch_input_types + ) logger.info(f"Creating inference session") sess_options = ort.SessionOptions() @@ -166,6 +181,9 @@ def run(self, args, logger, metrics_logger, unknown_args): sessionml = ort.InferenceSession( onnx_ml_model.SerializeToString(), sess_options ) + sessionml_batch = ort.InferenceSession( + onnx_ml_batch_model.SerializeToString(), sess_options + ) # capture data shape as property metrics_logger.set_properties( @@ -173,39 +191,53 @@ def run(self, args, logger, metrics_logger, unknown_args): inference_data_width=inference_data.num_feature(), ) + logger.info(f"Running .predict()") + # Warmup and compute results for _ in range(100): - predictions_arraysessionml.run( + sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data[0:1]}, )[0] + predictions_array = sessionml_batch.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data}, + )[0] - logger.info(f"Running .predict()") time_inferencing_per_query = [] - predictions_array = [] - for i in range(len(inference_raw_data)): - batch_start_time = time.monotonic() - prediction = sessionml.run( + + timeit_loops = 10 + onnxml_batch_time = timeit.timeit( + lambda: sessionml_batch.run( [sessionml.get_outputs()[0].name], - {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]}, - )[0] - prediction_time = time.monotonic() - batch_start_time + {sessionml.get_inputs()[0].name: inference_raw_data}, + ), + number=timeit_loops, + ) + onnxml_batch_time /= timeit_loops + + metrics_logger.log_metric("time_inferencing_batch", onnxml_batch_time) + + for i in range(len(inference_raw_data)): + prediction_time = timeit.timeit( + lambda: sessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]}, + ), + number=timeit_loops, + ) + prediction_time /= timeit_loops time_inferencing_per_query.append(prediction_time) - predictions_array.append(prediction) - metrics_logger.log_metric("time_inferencing", sum(prediction_time)) - - # TODO: Discuss alternative? - # onnxml_time = timeit.timeit( - # lambda: sessionml.run( - # [sessionml.get_outputs()[0].name], - # {sessionml.get_inputs()[0].name: inference_raw_data}, - # ), - # number=10, - # ) + metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_query)) # use helper to log latency with the right metric names metrics_logger.log_inferencing_latencies( - [prediction_time], # only one big batch + [onnxml_batch_time], # only one big batch + batch_length=len(inference_raw_data), + factor_to_usecs=1000000.0, # values are in seconds + ) + metrics_logger.log_inferencing_latencies( + time_inferencing_per_query, # only one big batch batch_length=len(inference_raw_data), factor_to_usecs=1000000.0, # values are in seconds ) From 611da1b271f45911ea4354749720d1f4913b15a6 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Tue, 18 Oct 2022 16:42:52 -0700 Subject: [PATCH 05/20] wip --- .../benchmarks/lightgbm-inferencing.yaml | 3 +- requirements.txt | 42 ++++++++++--------- src/pipelines/azureml/lightgbm_inferencing.py | 11 +++++ src/scripts/inferencing/lightgbm_ort/score.py | 34 ++++++--------- .../inferencing/lightgbm_ort/spec.yaml | 6 ++- 5 files changed, 54 insertions(+), 42 deletions(-) diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml index 13874a6e..18bd852e 100644 --- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml +++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml @@ -82,8 +82,9 @@ lightgbm_inferencing_config: build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile - - framework: lightgbm_ray # ray implementation + # - framework: lightgbm_ray # ray implementation - framework: lightgbm_ort # ONNX RT implementation + - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation - framework: treelite_python # v1.3.0 # to use custom_win_cli, you need to compile your own binaries diff --git a/requirements.txt b/requirements.txt index 9b4b581f..226c6181 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,28 +1,32 @@ # benchmark common code -mlflow==1.23.1 -omegaconf~=2.1 -mpi4py==3.1.1 -matplotlib==3.4.3 -psutil==5.8.0 +mlflow==1.29.0 +omegaconf==2.1.2 +mpi4py==3.1.3 +matplotlib==3.6.1 +psutil==5.9.3 # frameworks -ray==1.9.2 -lightgbm-ray==0.1.2 -lightgbm==3.3.1 -treelite==2.1.0 -treelite_runtime==2.1.0 -flaml==0.9.6 +ray==2.0.0 +lightgbm-ray==0.1.7 +lightgbm==3.3.3 +treelite==3.0.0 +treelite-runtime==3.0.0 +FLAML==1.0.13 hpbandster==0.7.4 -ConfigSpace==0.5.0 -optuna==2.8.0 +ConfigSpace==0.6.0 +optuna==3.0.3 # pipelines -shrike[pipeline]==1.14.7 -azure-ml-component==0.9.4.post1 # for component dsl -azureml-train-core==1.36.0 # for azureml.train.hyperdrive -azureml-dataset-runtime==1.36.0 # to register dataset -hydra-core~=1.0.3 -typing_extensions==4.0.1 # for hydra +shrike[pipeline]==1.31.10 +azure-core==1.26.0 +azure-storage-blob==12.13.0 +azure-ml-component==0.9.13.post1 +azureml-train-core==1.37.0 +azureml-dataset-runtime==1.46.0 +hydra-core==1.0.4 +typing_extensions==4.4.0 +azureml-mlflow==1.46.0 +mlflow-skinny==1.29.0 # unit testing pytest==6.2.4 diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py index f9cfc171..4a81fe43 100644 --- a/src/pipelines/azureml/lightgbm_inferencing.py +++ b/src/pipelines/azureml/lightgbm_inferencing.py @@ -187,6 +187,17 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, ) inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + elif variant.framework == "lightgbm_ort_multithread": + # call module with all the right arguments + inferencing_step = lightgbm_ort_score_module( + data = data, + model = model, + verbose = False, + run_parallel = True, + custom_properties = custom_properties + ) + inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + else: raise NotImplementedError(f"framework {variant.framework} not implemented (yet)") diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index b67a8af2..5ee427e9 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -86,14 +86,14 @@ def get_arg_parser(cls, parser=None): group_params.add_argument( "--num_threads", required=False, - default=1, + default=0, type=int, help="number of threads", ) group_params.add_argument( "--run_parallel", required=False, - default=True, + default=False, type=bool, help="number of threads", ) @@ -131,7 +131,9 @@ def run(self, args, logger, metrics_logger, unknown_args): # BUG: https://github.com/onnx/onnxmltools/issues/338 with open(args.model, "r") as mf: model_str = mf.read() - model_str = model_str.replace("objective=lambdarank", "objective=regression") + model_str = model_str.replace( + "objective=lambdarank", "objective=regression" + ) booster = lightgbm.Booster(model_str=model_str) logger.info(f"Loading data for inferencing") @@ -169,7 +171,14 @@ def run(self, args, logger, metrics_logger, unknown_args): logger.info(f"Creating inference session") sess_options = ort.SessionOptions() - # sess_options.intra_op_num_threads = args.num_threads + + if args.num_threads > 0: + logger.info(f"Setting number of threads to {args.num_threads}") + sess_options.intra_op_num_threads = args.num_threads + + if args.run_parallel: + logger.info(f"Creating multithreaded inference session") + sess_options.execution_mode = ( ort.ExecutionMode.ORT_PARALLEL if args.run_parallel @@ -205,19 +214,7 @@ def run(self, args, logger, metrics_logger, unknown_args): )[0] time_inferencing_per_query = [] - timeit_loops = 10 - onnxml_batch_time = timeit.timeit( - lambda: sessionml_batch.run( - [sessionml.get_outputs()[0].name], - {sessionml.get_inputs()[0].name: inference_raw_data}, - ), - number=timeit_loops, - ) - onnxml_batch_time /= timeit_loops - - metrics_logger.log_metric("time_inferencing_batch", onnxml_batch_time) - for i in range(len(inference_raw_data)): prediction_time = timeit.timeit( lambda: sessionml.run( @@ -231,11 +228,6 @@ def run(self, args, logger, metrics_logger, unknown_args): metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_query)) # use helper to log latency with the right metric names - metrics_logger.log_inferencing_latencies( - [onnxml_batch_time], # only one big batch - batch_length=len(inference_raw_data), - factor_to_usecs=1000000.0, # values are in seconds - ) metrics_logger.log_inferencing_latencies( time_inferencing_per_query, # only one big batch batch_length=len(inference_raw_data), diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml index b0ca1315..f97e94ce 100644 --- a/src/scripts/inferencing/lightgbm_ort/spec.yaml +++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml @@ -1,6 +1,6 @@ $schema: http://azureml/sdk-2-0/CommandComponent.json name: lightgbm_ort_score -version: 0.0.1 +version: 0.0.2 display_name: "LightGBM Inferencing (ONNX RT)" type: CommandComponent description: "LightGBM inferencing using the ONNX Runtime." @@ -21,6 +21,9 @@ inputs: n_threads: type: Integer optional: true + run_parallel: + type: Boolean + optional: true verbose: type: Boolean default: False @@ -39,6 +42,7 @@ command: >- --model {inputs.model} --output {outputs.predictions} [--num_threads {inputs.n_threads}] + [--run_parallel {inputs.run_parallel}] --predict_disable_shape_check {inputs.predict_disable_shape_check} --verbose {inputs.verbose} [--custom_properties {inputs.custom_properties}] From 37227f6b072ce5bd980da8f5b4f9929481a0885a Mon Sep 17 00:00:00 2001 From: David Majercak Date: Wed, 19 Oct 2022 14:30:44 -0700 Subject: [PATCH 06/20] update docs with results for onnx --- .../benchmarks/lightgbm-inferencing.yaml | 2 + docs/results/inferencing.md | 178 +++++++++++------- requirements.txt | 3 + src/pipelines/azureml/lightgbm_inferencing.py | 23 +++ src/scripts/analysis/analyze.py | 74 +++++--- src/scripts/inferencing/lightgbm_ort/score.py | 50 ++++- .../inferencing/lightgbm_ort/spec.yaml | 6 +- 7 files changed, 230 insertions(+), 106 deletions(-) diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml index 18bd852e..262964d8 100644 --- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml +++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml @@ -84,7 +84,9 @@ lightgbm_inferencing_config: build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile # - framework: lightgbm_ray # ray implementation - framework: lightgbm_ort # ONNX RT implementation + - framework: lightgbm_ort_batch # ONNX RT implementation - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation + - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded implementation - framework: treelite_python # v1.3.0 # to use custom_win_cli, you need to compile your own binaries diff --git a/docs/results/inferencing.md b/docs/results/inferencing.md index 8395ab49..b9f6242d 100644 --- a/docs/results/inferencing.md +++ b/docs/results/inferencing.md @@ -8,31 +8,35 @@ ## Variants -| variant_id | index | framework | version | build | cpu count | num threads | machine | system | -|:------------------|--------:|:----------------|:-----------------|:-----------------------------------------------------------|------------:|--------------:|:----------|:---------| -| lightgbm#0 | 0 | lightgbm | PYTHON_API.3.3.0 | default | 16 | 1 | x86_64 | Linux | -| lightgbm#1 | 1 | lightgbm | C_API.3.3.0 | default | 16 | 1 | x86_64 | Linux | -| lightgbm#2 | 2 | lightgbm | C_API.3.3.0 | docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux | -| lightgbm#3 | 3 | lightgbm | C_API.3.2.1 | docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux | -| lightgbm#4 | 4 | lightgbm | C_API.3.2.1 | docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux | -| treelite_python#5 | 5 | treelite_python | 1.3.0 | default | 16 | 1 | x86_64 | Linux | +| variant_id | index | framework | version | build | cpu count | num threads | machine | system | +|:---------------------------|--------:|:-------------------------|:-----------------|:-----------------------------------------------------------|------------:|--------------:|:----------|:---------| +| lightgbm#0 | 0 | lightgbm | PYTHON_API.3.3.0 | default | 16 | 1 | x86_64 | Linux | +| lightgbm#1 | 1 | lightgbm | C_API.3.3.0 | default | 16 | 1 | x86_64 | Linux | +| lightgbm#2 | 2 | lightgbm | C_API.3.3.0 | docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux | +| lightgbm#3 | 3 | lightgbm | C_API.3.2.1 | docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux | +| lightgbm#4 | 4 | lightgbm | C_API.3.2.1 | docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile | 16 | 1 | x86_64 | Linux | +| onnx#5 | 5 | onnx | ONNXRT.1.12.1 | default | 16 | 1 | x86_64 | Linux | +| onnx_batch#6 | 6 | onnx_batch | ONNXRT.1.12.1 | default | 16 | 1 | x86_64 | Linux | +| onnx_multithreaded#7 | 7 | onnx_multithreaded | ONNXRT.1.12.1 | default | 16 | - | x86_64 | Linux | +| onnx_multithreaded_batch#8 | 8 | onnx_multithreaded_batch | ONNXRT.1.12.1 | default | 16 | - | x86_64 | Linux | +| treelite_python#9 | 9 | treelite_python | 2.1.0 | default | 16 | 1 | x86_64 | Linux | ## Metric time_inferencing per prediction (usecs) -| inferencing task config | lightgbm#0 | lightgbm#1 | lightgbm#2 | lightgbm#3 | lightgbm#4 | treelite_python#5 | -|:---------------------------------------|-------------:|-------------:|-------------:|-------------:|-------------:|--------------------:| -| 10 trees
31 leaves
10 cols | 6.71442 | 1.27191 | 1.88084 | 1.97014 | 1.50457 | 0.299835 | -| 10 trees
31 leaves
100 cols | 10.0109 | 1.87281 | 1.89273 | 1.51227 | 1.93901 | 0.465536 | -| 10 trees
31 leaves
1000 cols | 37.308 | 4.32708 | 4.70362 | 7.06888 | 4.72284 | 2.08173 | -| 100 trees
31 leaves
10 cols | 18.8272 | 12.7087 | 14.9646 | 10.8278 | 16.6011 | 5.27241 | -| 100 trees
31 leaves
100 cols | 23.524 | 9.6317 | 11.2825 | 15.0675 | 13.3228 | 7.3904 | -| 100 trees
31 leaves
1000 cols | 45.8476 | 14.3042 | 18.5159 | 15.6538 | 14.9914 | 7.93605 | -| 1000 trees
31 leaves
10 cols | 113.854 | 95.4644 | 104.575 | 93.1975 | 107.137 | 28.5369 | -| 1000 trees
31 leaves
100 cols | 173.506 | 136.601 | 137.953 | 137.349 | 165.446 | 96.1941 | -| 1000 trees
31 leaves
1000 cols | 178.49 | 143.14 | 143.734 | 146.814 | 149.186 | 98.9669 | -| 5000 trees
31 leaves
10 cols | 395.046 | 394.296 | 425.493 | 326.193 | 443.607 | 251.199 | -| 5000 trees
31 leaves
100 cols | 467.79 | 459.998 | 535.714 | 537.431 | 450.346 | 295.183 | -| 5000 trees
31 leaves
1000 cols | 645.185 | 580.791 | 574.005 | 643.234 | 591.006 | 442.544 | +| inferencing task config | lightgbm#0 | lightgbm#1 | lightgbm#2 | lightgbm#3 | lightgbm#4 | onnx#5 | onnx_batch#6 | onnx_multithreaded#7 | onnx_multithreaded_batch#8 | treelite_python#9 | +|:---------------------------------------|-------------:|-------------:|-------------:|-------------:|-------------:|----------:|---------------:|-----------------------:|-----------------------------:|--------------------:| +| 10 trees
31 leaves
10 cols | 6.95305 | 1.11553 | 1.19408 | 1.15504 | 1.12653 | 7.62398 | 0.0969134 | 21.4563 | 0.198045 | 0.303221 | +| 10 trees
31 leaves
100 cols | 9.9608 | 1.57071 | 1.81644 | 1.55628 | 1.73756 | 7.67336 | 0.149622 | 22.5913 | 0.303975 | 0.449347 | +| 10 trees
31 leaves
1000 cols | 36.8206 | 3.97296 | 4.00286 | 4.35525 | 4.56862 | 7.65319 | 1.23701 | 21.9663 | 1.03079 | 1.90513 | +| 100 trees
31 leaves
10 cols | 16.081 | 10.3246 | 11.2351 | 10.4623 | 10.411 | 12.9457 | 0.489068 | 27.8963 | 0.518232 | 5.12872 | +| 100 trees
31 leaves
100 cols | 18.419 | 10.2733 | 9.27452 | 10.6115 | 10.4095 | 13.1084 | 0.691856 | 26.6879 | 0.637577 | 5.73254 | +| 100 trees
31 leaves
1000 cols | 45.0129 | 12.6701 | 11.4707 | 12.7013 | 12.794 | 11.9506 | 2.29946 | 28.9509 | 1.98307 | 7.35011 | +| 1000 trees
31 leaves
10 cols | 97.3209 | 97.622 | 103.892 | 95.7561 | 97.6808 | 18.3931 | 3.95854 | 40.0455 | 4.24206 | 33.3337 | +| 1000 trees
31 leaves
100 cols | 154.284 | 146.32 | 154.788 | 149.401 | 149.942 | 20.4271 | 5.15573 | 40.3441 | 4.93979 | 96.6871 | +| 1000 trees
31 leaves
1000 cols | 165.235 | 140.012 | 150.223 | 143.748 | 141.769 | 20.1743 | 11.7819 | 36.897 | 12.1277 | 101.73 | +| 5000 trees
31 leaves
10 cols | 376.015 | 407.244 | 373.407 | 366.11 | 383.453 | 43.7589 | 10.8586 | 85.8648 | 10.1721 | 219.653 | +| 5000 trees
31 leaves
100 cols | 421.179 | 465.234 | 482.583 | 468.308 | 473.928 | 104.56 | 24.15 | 156.015 | 24.2661 | 300.779 | +| 5000 trees
31 leaves
1000 cols | 644.905 | 587.578 | 581.033 | 625.28 | 598.814 | 94.8404 | 58.758 | 127.584 | 58.3206 | 416.228 | ## Percentile metrics for each variant @@ -43,66 +47,100 @@ Some variants above report percentile metrics. Those are reported by computing i | inferencing task config | p50_usecs | p90_usecs | p99_usecs | |:---------------------------------------|------------:|------------:|------------:| -| 10 trees
31 leaves
10 cols | 1.3 | 1.5 | 1.6 | -| 10 trees
31 leaves
100 cols | 1.8 | 2 | 3.1 | -| 10 trees
31 leaves
1000 cols | 4.201 | 4.5 | 5.6 | -| 100 trees
31 leaves
10 cols | 12.6 | 13.8 | 19.1 | -| 100 trees
31 leaves
100 cols | 9.501 | 10 | 12.802 | -| 100 trees
31 leaves
1000 cols | 14.301 | 15.601 | 25.001 | -| 1000 trees
31 leaves
10 cols | 95.1015 | 98.801 | 108.803 | -| 1000 trees
31 leaves
100 cols | 131.001 | 145.6 | 215.101 | -| 1000 trees
31 leaves
1000 cols | 142.601 | 145.202 | 157.302 | -| 5000 trees
31 leaves
10 cols | 383.404 | 430.905 | 584.61 | -| 5000 trees
31 leaves
100 cols | 448.404 | 504.305 | 633.407 | -| 5000 trees
31 leaves
1000 cols | 557.003 | 640.203 | 836.145 | +| 10 trees
31 leaves
10 cols | 1.1 | 1.2 | 1.399 | +| 10 trees
31 leaves
100 cols | 1.6 | 1.7 | 1.9 | +| 10 trees
31 leaves
1000 cols | 3.9 | 4.2 | 4.5 | +| 100 trees
31 leaves
10 cols | 10.3 | 11 | 11.601 | +| 100 trees
31 leaves
100 cols | 10.2 | 10.7 | 11.1 | +| 100 trees
31 leaves
1000 cols | 12.601 | 13.001 | 13.6 | +| 1000 trees
31 leaves
10 cols | 96 | 102.001 | 114.201 | +| 1000 trees
31 leaves
100 cols | 145.899 | 150.599 | 161.099 | +| 1000 trees
31 leaves
1000 cols | 139.124 | 142.024 | 154.528 | +| 5000 trees
31 leaves
10 cols | 405.801 | 424.302 | 444.202 | +| 5000 trees
31 leaves
100 cols | 464.302 | 476.601 | 490.101 | +| 5000 trees
31 leaves
1000 cols | 585.368 | 600.169 | 611.8 | ### lightgbm#2 | inferencing task config | p50_usecs | p90_usecs | p99_usecs | |:---------------------------------------|------------:|------------:|------------:| -| 10 trees
31 leaves
10 cols | 1.8 | 2.1 | 2.601 | -| 10 trees
31 leaves
100 cols | 1.9 | 2 | 2.10001 | -| 10 trees
31 leaves
1000 cols | 4.7 | 4.901 | 5.4 | -| 100 trees
31 leaves
10 cols | 13.7 | 15.4 | 37.204 | -| 100 trees
31 leaves
100 cols | 10.8 | 12.901 | 17.301 | -| 100 trees
31 leaves
1000 cols | 17.7 | 19.001 | 31.4 | -| 1000 trees
31 leaves
10 cols | 104.003 | 108.703 | 122.603 | -| 1000 trees
31 leaves
100 cols | 132.501 | 149.701 | 221.015 | -| 1000 trees
31 leaves
1000 cols | 138.702 | 160.802 | 219.107 | -| 5000 trees
31 leaves
10 cols | 425.024 | 463.626 | 496.927 | -| 5000 trees
31 leaves
100 cols | 508.705 | 588.917 | 946.39 | -| 5000 trees
31 leaves
1000 cols | 550.905 | 624.606 | 810.269 | +| 10 trees
31 leaves
10 cols | 1.2 | 1.3 | 1.5 | +| 10 trees
31 leaves
100 cols | 1.8 | 1.9 | 2.1 | +| 10 trees
31 leaves
1000 cols | 3.9 | 4.2 | 4.8 | +| 100 trees
31 leaves
10 cols | 11.1 | 12 | 13.8 | +| 100 trees
31 leaves
100 cols | 9.3 | 9.601 | 10 | +| 100 trees
31 leaves
1000 cols | 11.399 | 11.799 | 13.401 | +| 1000 trees
31 leaves
10 cols | 103.501 | 108.1 | 116.9 | +| 1000 trees
31 leaves
100 cols | 154.296 | 159.296 | 170.495 | +| 1000 trees
31 leaves
1000 cols | 149.602 | 152.301 | 164.802 | +| 5000 trees
31 leaves
10 cols | 372.405 | 389.205 | 405.207 | +| 5000 trees
31 leaves
100 cols | 481.504 | 496.705 | 510.607 | +| 5000 trees
31 leaves
1000 cols | 578.888 | 596.699 | 618.387 | ### lightgbm#3 | inferencing task config | p50_usecs | p90_usecs | p99_usecs | |:---------------------------------------|------------:|------------:|------------:| -| 10 trees
31 leaves
10 cols | 1.8 | 2.3 | 3.1 | -| 10 trees
31 leaves
100 cols | 1.5 | 1.6 | 1.9 | -| 10 trees
31 leaves
1000 cols | 6.3 | 7.2 | 23.901 | -| 100 trees
31 leaves
10 cols | 10.8 | 11.6 | 12.6 | -| 100 trees
31 leaves
100 cols | 14.3 | 15.7 | 29.903 | -| 100 trees
31 leaves
1000 cols | 15.1 | 16.2 | 27.201 | -| 1000 trees
31 leaves
10 cols | 85.301 | 109.901 | 168.301 | -| 1000 trees
31 leaves
100 cols | 132.401 | 149.601 | 201.402 | -| 1000 trees
31 leaves
1000 cols | 146.202 | 148.903 | 161.503 | -| 5000 trees
31 leaves
10 cols | 312.703 | 354.715 | 505.311 | -| 5000 trees
31 leaves
100 cols | 537.638 | 582.651 | 608.343 | -| 5000 trees
31 leaves
1000 cols | 641.307 | 654.907 | 667.409 | +| 10 trees
31 leaves
10 cols | 1.1 | 1.3 | 1.5 | +| 10 trees
31 leaves
100 cols | 1.5 | 1.7 | 1.9 | +| 10 trees
31 leaves
1000 cols | 4.3 | 4.5 | 4.9 | +| 100 trees
31 leaves
10 cols | 10.401 | 11.2 | 11.8 | +| 100 trees
31 leaves
100 cols | 10.601 | 11.001 | 11.401 | +| 100 trees
31 leaves
1000 cols | 12.601 | 13 | 13.6 | +| 1000 trees
31 leaves
10 cols | 95.5 | 99 | 108.8 | +| 1000 trees
31 leaves
100 cols | 149 | 153.8 | 164.202 | +| 1000 trees
31 leaves
1000 cols | 142.699 | 145.799 | 158.899 | +| 5000 trees
31 leaves
10 cols | 363.53 | 384.032 | 427.939 | +| 5000 trees
31 leaves
100 cols | 466.461 | 479.863 | 501.27 | +| 5000 trees
31 leaves
1000 cols | 622.902 | 637.601 | 650.101 | ### lightgbm#4 | inferencing task config | p50_usecs | p90_usecs | p99_usecs | |:---------------------------------------|------------:|------------:|------------:| -| 10 trees
31 leaves
10 cols | 1.3 | 1.7 | 2.7 | -| 10 trees
31 leaves
100 cols | 1.8 | 2.2 | 2.6 | -| 10 trees
31 leaves
1000 cols | 4.7 | 4.9 | 5.3 | -| 100 trees
31 leaves
10 cols | 15.7 | 17.2 | 34.9 | -| 100 trees
31 leaves
100 cols | 12.201 | 13.501 | 48.706 | -| 100 trees
31 leaves
1000 cols | 14.901 | 16.101 | 24.701 | -| 1000 trees
31 leaves
10 cols | 97.301 | 136.401 | 201.902 | -| 1000 trees
31 leaves
100 cols | 164.901 | 170.101 | 182.801 | -| 1000 trees
31 leaves
1000 cols | 148.403 | 151.003 | 166.205 | -| 5000 trees
31 leaves
10 cols | 439.327 | 492.54 | 602.444 | -| 5000 trees
31 leaves
100 cols | 439.432 | 490.245 | 605.846 | -| 5000 trees
31 leaves
1000 cols | 571.902 | 640.112 | 827.614 | +| 10 trees
31 leaves
10 cols | 1.1 | 1.3 | 1.4 | +| 10 trees
31 leaves
100 cols | 1.7 | 1.9 | 2 | +| 10 trees
31 leaves
1000 cols | 4.5 | 4.8 | 5.2 | +| 100 trees
31 leaves
10 cols | 10.4 | 11.1 | 11.9 | +| 100 trees
31 leaves
100 cols | 10.4 | 10.8 | 11.3 | +| 100 trees
31 leaves
1000 cols | 12.798 | 13.099 | 13.598 | +| 1000 trees
31 leaves
10 cols | 97.302 | 101.201 | 111.002 | +| 1000 trees
31 leaves
100 cols | 149.489 | 154.29 | 165.188 | +| 1000 trees
31 leaves
1000 cols | 141.2 | 143.601 | 156.5 | +| 5000 trees
31 leaves
10 cols | 382.303 | 398.402 | 413.602 | +| 5000 trees
31 leaves
100 cols | 472.51 | 485.21 | 499.01 | +| 5000 trees
31 leaves
1000 cols | 596.097 | 611.307 | 625.896 | + +### onnx#5 + +| inferencing task config | p50_usecs | p90_usecs | p99_usecs | +|:---------------------------------------|------------:|------------:|------------:| +| 10 trees
31 leaves
10 cols | 7.51 | 7.6 | 8.88 | +| 10 trees
31 leaves
100 cols | 7.5998 | 7.6798 | 8.8698 | +| 10 trees
31 leaves
1000 cols | 7.59 | 7.6901 | 8.91 | +| 100 trees
31 leaves
10 cols | 12.85 | 13.09 | 14.6201 | +| 100 trees
31 leaves
100 cols | 12.9402 | 13.6202 | 14.7802 | +| 100 trees
31 leaves
1000 cols | 11.8401 | 12.09 | 13.4901 | +| 1000 trees
31 leaves
10 cols | 18.0601 | 19.2001 | 21.3902 | +| 1000 trees
31 leaves
100 cols | 20.1093 | 21.0993 | 23.7093 | +| 1000 trees
31 leaves
1000 cols | 19.6325 | 21.1828 | 23.7534 | +| 5000 trees
31 leaves
10 cols | 43.3894 | 45.2993 | 47.5894 | +| 5000 trees
31 leaves
100 cols | 104.27 | 111.281 | 118.342 | +| 5000 trees
31 leaves
1000 cols | 94.5217 | 97.9918 | 101.332 | + +### onnx_multithreaded#7 + +| inferencing task config | p50_usecs | p90_usecs | p99_usecs | +|:---------------------------------------|------------:|------------:|------------:| +| 10 trees
31 leaves
10 cols | 21.8309 | 22.6609 | 24.3009 | +| 10 trees
31 leaves
100 cols | 21.985 | 23.721 | 45.6108 | +| 10 trees
31 leaves
1000 cols | 22.2599 | 23.7498 | 25.5398 | +| 100 trees
31 leaves
10 cols | 26.3017 | 29.4928 | 42.4632 | +| 100 trees
31 leaves
100 cols | 25.7001 | 28.0201 | 42.1234 | +| 100 trees
31 leaves
1000 cols | 27.34 | 29.691 | 38.3805 | +| 1000 trees
31 leaves
10 cols | 36.7701 | 40.1901 | 105.035 | +| 1000 trees
31 leaves
100 cols | 36.8403 | 39.6914 | 109.843 | +| 1000 trees
31 leaves
1000 cols | 33.7296 | 36.1596 | 68.4439 | +| 5000 trees
31 leaves
10 cols | 72.6305 | 91.5047 | 400.135 | +| 5000 trees
31 leaves
100 cols | 122.421 | 173.977 | 828.446 | +| 5000 trees
31 leaves
1000 cols | 101.62 | 130.733 | 732.035 | \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 226c6181..e0d7e00c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,6 +8,9 @@ psutil==5.9.3 # frameworks ray==2.0.0 lightgbm-ray==0.1.7 +onnxruntime==1.12.1 +onnxmltools==1.11.1 +onnxconverter-common==1.12.2 lightgbm==3.3.3 treelite==3.0.0 treelite-runtime==3.0.0 diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py index 4a81fe43..9da7934d 100644 --- a/src/pipelines/azureml/lightgbm_inferencing.py +++ b/src/pipelines/azureml/lightgbm_inferencing.py @@ -197,6 +197,29 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, custom_properties = custom_properties ) inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + + elif variant.framework == "lightgbm_ort_batch": + # call module with all the right arguments + inferencing_step = lightgbm_ort_score_module( + data=data, + model=model, + verbose=False, + run_batch=True, + custom_properties=custom_properties, + ) + inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + + elif variant.framework == "lightgbm_ort_multithread_batch": + # call module with all the right arguments + inferencing_step = lightgbm_ort_score_module( + data=data, + model=model, + verbose=False, + run_parallel=True, + run_batch=True, + custom_properties=custom_properties, + ) + inferencing_step.runsettings.configure(target=config.compute.linux_cpu) else: raise NotImplementedError(f"framework {variant.framework} not implemented (yet)") diff --git a/src/scripts/analysis/analyze.py b/src/scripts/analysis/analyze.py index 3d9e0ffe..fbf2ac1c 100644 --- a/src/scripts/analysis/analyze.py +++ b/src/scripts/analysis/analyze.py @@ -1,5 +1,5 @@ # Copyright (c) Microsoft Corporation. -# Licensed under the MIT license. +# Licensed under the MIT license. """ TreeLite/Python inferencing script @@ -131,9 +131,10 @@ def fetch_benchmark_data(self, experiment_id, filter_string): self.logger.info("Fetching Benchmark Runs") # NOTE: returns a pandas dataframe - self.benchmark_data = mlflow.search_runs( - filter_string=filter_string - ) + self.benchmark_data = mlflow.search_runs(filter_string=filter_string) + self.benchmark_data = self.benchmark_data[ + self.benchmark_data.status == "FINISHED" + ] # extract all model information if present if 'tags.benchmark_model' in self.benchmark_data.columns: @@ -160,6 +161,15 @@ def fetch_benchmark_data(self, experiment_id, filter_string): def report_inferencing(self, output_path): """ Uses fetched or load data to produce a reporting for inferencing tasks. """ + + # Drop rows which do not specify the time + self.benchmark_data = self.benchmark_data.dropna( + subset=[ + "metrics.time_inferencing", + "dataset_samples", + ] + ) + # create variant readable id self.benchmark_data['variant_id'] = self.benchmark_data['tags.framework'] + "#" + self.benchmark_data['tags.variant_index'] @@ -190,7 +200,6 @@ def report_inferencing(self, output_path): variant_indices_sorted = [ variant_indices[k] for k in variant_indices_sorted_keys ] variants.columns = ['index', 'framework', 'version', 'build', 'cpu count', 'num threads', 'machine', 'system'] - #variants = variants.transpose() # reduce time_inferencing to predict time per request, in micro seconds self.benchmark_data['avg_predict_time_usecs'] = self.benchmark_data['metrics.time_inferencing'].astype(float) / self.benchmark_data['dataset_samples'].astype(int) * 1000000 @@ -202,6 +211,13 @@ def report_inferencing(self, output_path): + self.benchmark_data['model_columns'] + " cols" ) + # Take last measurement per inferencing task config + self.benchmark_data = ( + self.benchmark_data.sort_values("start_time") + .groupby(["inferencing task config", "variant_id"]) + .last() + ).reset_index() + # pivot metrics table metrics = self.benchmark_data.pivot( index=['inferencing task config'], @@ -216,32 +232,38 @@ def report_inferencing(self, output_path): for variant_id in variant_indices_sorted: percentile_metrics_values = ( - self.benchmark_data.loc[self.benchmark_data['variant_id'] == variant_id][[ - 'inferencing task config', - 'variant_id', - 'metrics.batch_time_inferencing_p50_usecs', - 'metrics.batch_time_inferencing_p90_usecs', - 'metrics.batch_time_inferencing_p99_usecs' - ]] + self.benchmark_data.loc[ + self.benchmark_data["variant_id"] == variant_id + ][ + [ + "inferencing task config", + "variant_id", + "metrics.batch_latency_p50_usecs", + "metrics.batch_latency_p90_usecs", + "metrics.batch_latency_p99_usecs", + ] + ] ).dropna() - + if len(percentile_metrics_values) == 0: continue - percentile_metrics = ( - percentile_metrics_values.pivot( - index=['inferencing task config'], - columns=['variant_id'], - values=['metrics.batch_time_inferencing_p50_usecs', 'metrics.batch_time_inferencing_p90_usecs', 'metrics.batch_time_inferencing_p99_usecs'] - ) + percentile_metrics = percentile_metrics_values.pivot( + index=["inferencing task config"], + columns=["variant_id"], + values=[ + "metrics.batch_latency_p50_usecs", + "metrics.batch_latency_p90_usecs", + "metrics.batch_latency_p99_usecs", + ], ) - percentile_metrics.columns = [ col[0].lstrip("metrics.batch_time_inferencing_") for col in percentile_metrics.columns ] + percentile_metrics.columns = [ + col[0].lstrip("metrics.batch_latency_") + for col in percentile_metrics.columns + ] percentile_metrics_reports.append( - { - 'variant_id' : variant_id, - 'report' : percentile_metrics.to_markdown() - } + {"variant_id": variant_id, "report": percentile_metrics.to_markdown()} ) # load the jinja template from local files @@ -297,10 +319,10 @@ def run(args, unknown_args=[]): experiment_id=args.experiment_id, filter_string=f"tags.task = 'score' and tags.benchmark_name = '{args.benchmark_id}'" ) - + if args.data_save: analysis_engine.save_benchmark_data(args.data_save) - + analysis_engine.report_inferencing(args.output) else: diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index 5ee427e9..4d7d028f 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -31,9 +31,15 @@ class LightGBMONNXRTInferecingScript(RunnableScript): def __init__(self): + framework = "onnx" + if "--run_parallel" in sys.argv: + framework += "_multithreaded" + if "--run_batch" in sys.argv: + framework += "_batch" + super().__init__( task="score", - framework="lightgbm", + framework=framework, framework_version="ONNXRT." + str(ort.__version__), ) @@ -95,7 +101,14 @@ def get_arg_parser(cls, parser=None): required=False, default=False, type=bool, - help="number of threads", + help="allows intra sample parallelism", + ) + group_params.add_argument( + "--run_batch", + required=False, + default=False, + type=bool, + help="runs inference in a single batch", ) group_params.add_argument( "--predict_disable_shape_check", @@ -215,22 +228,41 @@ def run(self, args, logger, metrics_logger, unknown_args): time_inferencing_per_query = [] timeit_loops = 10 - for i in range(len(inference_raw_data)): + + if args.run_batch: + batch_length = len(inference_raw_data) prediction_time = timeit.timeit( - lambda: sessionml.run( - [sessionml.get_outputs()[0].name], - {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]}, + lambda: sessionml_batch.run( + [sessionml_batch.get_outputs()[0].name], + {sessionml_batch.get_inputs()[0].name: inference_raw_data}, ), number=timeit_loops, ) prediction_time /= timeit_loops - time_inferencing_per_query.append(prediction_time) - metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_query)) + metrics_logger.log_metric("time_inferencing", prediction_time) + time_inferencing_per_query = [prediction_time] + else: + batch_length = 1 + for i in range(len(inference_raw_data)): + prediction_time = timeit.timeit( + lambda: sessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]}, + ), + number=timeit_loops, + ) + prediction_time /= timeit_loops + time_inferencing_per_query.append(prediction_time) + metrics_logger.log_metric( + "time_inferencing", sum(time_inferencing_per_query) + ) + + logger.info(f"Batch size: {batch_length}") # use helper to log latency with the right metric names metrics_logger.log_inferencing_latencies( time_inferencing_per_query, # only one big batch - batch_length=len(inference_raw_data), + batch_length=batch_length, factor_to_usecs=1000000.0, # values are in seconds ) diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml index f97e94ce..c35157ab 100644 --- a/src/scripts/inferencing/lightgbm_ort/spec.yaml +++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml @@ -1,6 +1,6 @@ $schema: http://azureml/sdk-2-0/CommandComponent.json name: lightgbm_ort_score -version: 0.0.2 +version: 0.0.3 display_name: "LightGBM Inferencing (ONNX RT)" type: CommandComponent description: "LightGBM inferencing using the ONNX Runtime." @@ -24,6 +24,9 @@ inputs: run_parallel: type: Boolean optional: true + run_batch: + type: Boolean + optional: true verbose: type: Boolean default: False @@ -43,6 +46,7 @@ command: >- --output {outputs.predictions} [--num_threads {inputs.n_threads}] [--run_parallel {inputs.run_parallel}] + [--run_batch {inputs.run_batch}] --predict_disable_shape_check {inputs.predict_disable_shape_check} --verbose {inputs.verbose} [--custom_properties {inputs.custom_properties}] From 8730179e13fefa1461d99461f8b0450adee7aba6 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Wed, 19 Oct 2022 14:54:51 -0700 Subject: [PATCH 07/20] delete file --- .../lightgbm-inferencing-prod.yaml | 51 ------------------- 1 file changed, 51 deletions(-) delete mode 100644 conf/experiments/lightgbm-inferencing-prod.yaml diff --git a/conf/experiments/lightgbm-inferencing-prod.yaml b/conf/experiments/lightgbm-inferencing-prod.yaml deleted file mode 100644 index d2898f2d..00000000 --- a/conf/experiments/lightgbm-inferencing-prod.yaml +++ /dev/null @@ -1,51 +0,0 @@ -# This experiment runs multiple variants of lightgbm inferencing + treelite -# on a given user-defined dataset and model -# -# to execute: -# > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml - -defaults: - - aml: custom_prod - - compute: custom_prod - -### CUSTOM PARAMETERS ### - -experiment: - name: "lightgbm_inferencing_prod" - description: "something interesting to say about this" - -run: - submit: true - -lightgbm_inferencing_config: - # name of your particular benchmark - benchmark_name: "benchmark-prod" # override this with a unique name - - # list all the data/model pairs to run inferencing with - tasks: - - data: - name: "NiR4_OFE_FR_NOFF_DATA" - model: - name: "NiR4_OFE_LGBM" - - data: - name: "NiR4_HRS_FR_NOFF_DATA" - model: - name: "NiR4_HRS_LGBM" - - # list all inferencing frameworks and their builds - variants: - # - framework: lightgbm_python # v3.3.0 via pypi - # - framework: lightgbm_c_api # v3.3.0 with C API prediction - # - framework: lightgbm_c_api # v3.3.0 with C API prediction - # build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile - # - framework: lightgbm_c_api # v3.2.1 with C API prediction - # build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile - # - framework: lightgbm_c_api # v3.2.1 with C API prediction - # build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile - # - framework: lightgbm_ray # ray implementation - - framework: lightgbm_ort # ONNX RT implementation - - framework: treelite_python # v1.3.0 - - # to use custom_win_cli, you need to compile your own binaries - # see src/scripts/inferencing/custom_win_cli/static_binaries/README.md - #- framework: custom_win_cli From a581cd19293c3030a302390b207c412550e6d628 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Wed, 19 Oct 2022 14:59:23 -0700 Subject: [PATCH 08/20] update configs --- conf/experiments/benchmarks/lightgbm-inferencing.yaml | 6 +++--- conf/experiments/lightgbm-inferencing.yaml | 3 +++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml index 262964d8..17bca896 100644 --- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml +++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml @@ -82,11 +82,11 @@ lightgbm_inferencing_config: build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile - # - framework: lightgbm_ray # ray implementation + - framework: lightgbm_ray # ray implementation - framework: lightgbm_ort # ONNX RT implementation - - framework: lightgbm_ort_batch # ONNX RT implementation + - framework: lightgbm_ort_batch # ONNX RT single batch implementation - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation - - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded implementation + - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded single batch implementation - framework: treelite_python # v1.3.0 # to use custom_win_cli, you need to compile your own binaries diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml index f70d2d22..bc5a667c 100644 --- a/conf/experiments/lightgbm-inferencing.yaml +++ b/conf/experiments/lightgbm-inferencing.yaml @@ -40,6 +40,9 @@ lightgbm_inferencing_config: build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile - framework: lightgbm_ray # ray implementation - framework: lightgbm_ort # ONNX RT implementation + - framework: lightgbm_ort_batch # ONNX RT single batch implementation + - framework: lightgbm_ort_multithread # ONNX RT multithreaded implementation + - framework: lightgbm_ort_multithread_batch # ONNX RT multithreaded single batch implementation - framework: treelite_python # v1.3.0 # to use custom_win_cli, you need to compile your own binaries From 8ed2d05fdfacf17477706e1001980d3fb0e78bb5 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Wed, 19 Oct 2022 15:03:07 -0700 Subject: [PATCH 09/20] use timeit instead of monotonic --- src/scripts/inferencing/lightgbm_python/score.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_python/score.py b/src/scripts/inferencing/lightgbm_python/score.py index 94505f5c..257e6603 100644 --- a/src/scripts/inferencing/lightgbm_python/score.py +++ b/src/scripts/inferencing/lightgbm_python/score.py @@ -8,7 +8,7 @@ import sys import argparse import logging -import time +import timeit import numpy as np from distutils.util import strtobool import lightgbm @@ -105,13 +105,21 @@ def run(self, args, logger, metrics_logger, unknown_args): ) logger.info(f"Running .predict()") - batch_start_time = time.monotonic() predictions_array = booster.predict( data=inference_raw_data, num_threads=args.num_threads, - predict_disable_shape_check=bool(args.predict_disable_shape_check) + predict_disable_shape_check=bool(args.predict_disable_shape_check), ) - prediction_time = (time.monotonic() - batch_start_time) + timeit_loops = 10 + prediction_time = timeit.timeit( + lambda: booster.predict( + data=inference_raw_data, + num_threads=args.num_threads, + predict_disable_shape_check=bool(args.predict_disable_shape_check), + ), + number=timeit_loops, + ) + prediction_time /= timeit_loops metrics_logger.log_metric("time_inferencing", prediction_time) # use helper to log latency with the right metric names From 06a12d54d428751bbed4abc3b5dcce49d751d883 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Wed, 19 Oct 2022 15:31:29 -0700 Subject: [PATCH 10/20] use timeit instead of monotonic --- .../inferencing/lightgbm_python/spec.yaml | 2 +- src/scripts/inferencing/lightgbm_ray/score.py | 16 +++++++++++++--- src/scripts/inferencing/lightgbm_ray/spec.yaml | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_python/spec.yaml b/src/scripts/inferencing/lightgbm_python/spec.yaml index 7fcbebca..05c30f24 100644 --- a/src/scripts/inferencing/lightgbm_python/spec.yaml +++ b/src/scripts/inferencing/lightgbm_python/spec.yaml @@ -1,6 +1,6 @@ $schema: http://azureml/sdk-2-0/CommandComponent.json name: lightgbm_python_score -version: 1.0.1 +version: 1.0.2 display_name: "LightGBM Inferencing (Python API)" type: CommandComponent description: "LightGBM inferencing using the Python API." diff --git a/src/scripts/inferencing/lightgbm_ray/score.py b/src/scripts/inferencing/lightgbm_ray/score.py index 270efc5d..67561c68 100644 --- a/src/scripts/inferencing/lightgbm_ray/score.py +++ b/src/scripts/inferencing/lightgbm_ray/score.py @@ -8,7 +8,7 @@ import sys import argparse import logging -import time +import timeit import numpy as np from distutils.util import strtobool @@ -99,13 +99,23 @@ def run(self, args, logger, metrics_logger, unknown_args): ) logger.info(f"Running .predict()") - batch_start_time = time.monotonic() + predictions_array = lightgbm_ray.predict( booster, inference_data, ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads) ) - prediction_time = (time.monotonic() - batch_start_time) + + timeit_loops = 10 + prediction_time = timeit.timeit( + lambda: lightgbm_ray.predict( + booster, + inference_data, + ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads) + ), + number=timeit_loops, + ) + prediction_time /= timeit_loops metrics_logger.log_metric("time_inferencing", prediction_time) # use helper to log latency with the right metric names diff --git a/src/scripts/inferencing/lightgbm_ray/spec.yaml b/src/scripts/inferencing/lightgbm_ray/spec.yaml index d94c7ce3..482632e5 100644 --- a/src/scripts/inferencing/lightgbm_ray/spec.yaml +++ b/src/scripts/inferencing/lightgbm_ray/spec.yaml @@ -1,6 +1,6 @@ $schema: http://azureml/sdk-2-0/CommandComponent.json name: lightgbm_ray_score -version: 1.0.1 +version: 1.0.2 display_name: "LightGBM Inferencing (Ray)" type: CommandComponent description: "LightGBM inferencing using the Ray Python API." From 9dab9c0bd675f33c010f0f30bc41a39593d28a29 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Fri, 21 Oct 2022 09:43:07 -0700 Subject: [PATCH 11/20] remove run submit true --- conf/experiments/benchmarks/lightgbm-inferencing.yaml | 3 --- conf/experiments/lightgbm-inferencing.yaml | 3 --- 2 files changed, 6 deletions(-) diff --git a/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/conf/experiments/benchmarks/lightgbm-inferencing.yaml index 17bca896..066b1974 100644 --- a/conf/experiments/benchmarks/lightgbm-inferencing.yaml +++ b/conf/experiments/benchmarks/lightgbm-inferencing.yaml @@ -14,9 +14,6 @@ experiment: name: "lightgbm_inferencing_dev" description: "something interesting to say about this" -run: - submit: true - lightgbm_inferencing_config: # name of your particular benchmark benchmark_name: "benchmark-inferencing-20211216.1" # need to be provided at runtime! diff --git a/conf/experiments/lightgbm-inferencing.yaml b/conf/experiments/lightgbm-inferencing.yaml index bc5a667c..940d3ac2 100644 --- a/conf/experiments/lightgbm-inferencing.yaml +++ b/conf/experiments/lightgbm-inferencing.yaml @@ -14,9 +14,6 @@ experiment: name: "lightgbm_inferencing_dev" description: "something interesting to say about this" -run: - submit: true - lightgbm_inferencing_config: # name of your particular benchmark benchmark_name: "benchmark-dev" # override this with a unique name From 30033be54f8b675f23834270dc7fff75591e3425 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Fri, 21 Oct 2022 10:02:20 -0700 Subject: [PATCH 12/20] correct package versions --- requirements.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index e0d7e00c..79c0ac6d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,8 +21,8 @@ optuna==3.0.3 # pipelines shrike[pipeline]==1.31.10 -azure-core==1.26.0 -azure-storage-blob==12.13.0 +azure-core==1.20.1 +azure-storage-blob==12.11.0 azure-ml-component==0.9.13.post1 azureml-train-core==1.37.0 azureml-dataset-runtime==1.46.0 From be708b1ca663be9f2753e59c2b8e0dcc5a2a3855 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Wed, 2 Nov 2022 11:25:33 -0700 Subject: [PATCH 13/20] update onnx benchmarking --- src/common/tasks.py | 3 ++ src/pipelines/azureml/lightgbm_inferencing.py | 41 ++++--------------- src/scripts/inferencing/lightgbm_ort/score.py | 19 +++++---- .../inferencing/lightgbm_ort/spec.yaml | 2 +- 4 files changed, 23 insertions(+), 42 deletions(-) diff --git a/src/common/tasks.py b/src/common/tasks.py index 95504774..df3d1b25 100644 --- a/src/common/tasks.py +++ b/src/common/tasks.py @@ -26,6 +26,9 @@ class inferencing_task: class inferencing_variants: framework: str = MISSING build: Optional[str] = None + threads: Optional[int] = 1 + batch_exec: Optional[bool] = False + parallel_exec: Optional[bool] = False @dataclass class data_generation_task: diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py index 9da7934d..f272180d 100644 --- a/src/pipelines/azureml/lightgbm_inferencing.py +++ b/src/pipelines/azureml/lightgbm_inferencing.py @@ -183,43 +183,18 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, data = data, model = model, verbose = False, + run_parallel = variant.parallel_exec, + run_batch = variant.batch_exec, + n_threads = variant.threads, custom_properties = custom_properties ) inferencing_step.runsettings.configure(target=config.compute.linux_cpu) - elif variant.framework == "lightgbm_ort_multithread": - # call module with all the right arguments - inferencing_step = lightgbm_ort_score_module( - data = data, - model = model, - verbose = False, - run_parallel = True, - custom_properties = custom_properties - ) - inferencing_step.runsettings.configure(target=config.compute.linux_cpu) - - elif variant.framework == "lightgbm_ort_batch": - # call module with all the right arguments - inferencing_step = lightgbm_ort_score_module( - data=data, - model=model, - verbose=False, - run_batch=True, - custom_properties=custom_properties, - ) - inferencing_step.runsettings.configure(target=config.compute.linux_cpu) - - elif variant.framework == "lightgbm_ort_multithread_batch": - # call module with all the right arguments - inferencing_step = lightgbm_ort_score_module( - data=data, - model=model, - verbose=False, - run_parallel=True, - run_batch=True, - custom_properties=custom_properties, - ) - inferencing_step.runsettings.configure(target=config.compute.linux_cpu) + if variant.parallel_exec: + variant_comment.append(f"parallel execution") + if variant.batch_exec: + variant_comment.append(f"batch execution") + variant_comment.append(f"num threads {variant.threads}") else: raise NotImplementedError(f"framework {variant.framework} not implemented (yet)") diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index 4d7d028f..b5ee24be 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -32,10 +32,12 @@ class LightGBMONNXRTInferecingScript(RunnableScript): def __init__(self): framework = "onnx" - if "--run_parallel" in sys.argv: - framework += "_multithreaded" - if "--run_batch" in sys.argv: + if "--run_parallel" in sys.argv and strtobool(sys.argv[sys.argv.index("--run_parallel") + 1]): + framework += "_parallel" + if "--run_batch" in sys.argv and strtobool(sys.argv[sys.argv.index("--run_batch") + 1]): framework += "_batch" + if "--num_threads" in sys.argv: + framework += f"_threads_{sys.argv[sys.argv.index('--num_threads') + 1]}" super().__init__( task="score", @@ -99,21 +101,21 @@ def get_arg_parser(cls, parser=None): group_params.add_argument( "--run_parallel", required=False, - default=False, - type=bool, + default="False", + type=strtobool, help="allows intra sample parallelism", ) group_params.add_argument( "--run_batch", required=False, - default=False, - type=bool, + default="False", + type=strtobool, help="runs inference in a single batch", ) group_params.add_argument( "--predict_disable_shape_check", required=False, - default=False, + default="False", type=strtobool, help="See LightGBM documentation", ) @@ -188,6 +190,7 @@ def run(self, args, logger, metrics_logger, unknown_args): if args.num_threads > 0: logger.info(f"Setting number of threads to {args.num_threads}") sess_options.intra_op_num_threads = args.num_threads + sess_options.inter_op_num_threads = args.num_threads if args.run_parallel: logger.info(f"Creating multithreaded inference session") diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml index c35157ab..b4fc6530 100644 --- a/src/scripts/inferencing/lightgbm_ort/spec.yaml +++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml @@ -1,6 +1,6 @@ $schema: http://azureml/sdk-2-0/CommandComponent.json name: lightgbm_ort_score -version: 0.0.3 +version: 0.0.6 display_name: "LightGBM Inferencing (ONNX RT)" type: CommandComponent description: "LightGBM inferencing using the ONNX Runtime." From 2a9ef5f0fbb8b315170e11064f91674c14d4bb0f Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 14 Nov 2022 16:28:31 -0800 Subject: [PATCH 14/20] small updates --- Exploration.ipynb | 551 ++++++++++++++++++ .../inferencing/lightgbm_ort/spec.yaml | 4 +- 2 files changed, 553 insertions(+), 2 deletions(-) create mode 100644 Exploration.ipynb diff --git a/Exploration.ipynb b/Exploration.ipynb new file mode 100644 index 00000000..48b06e64 --- /dev/null +++ b/Exploration.ipynb @@ -0,0 +1,551 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import lightgbm\n", + "import timeit" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0123456789...4854485548564857485848594860486148624863
0129612.0153000.06000.00.00.0118470648.00.0628.06000.0...0.0132.0140.0137.0135.00.00.00.00.00.0
11200.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
21200.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
3760.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
41200.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 4864 columns

\n", + "
" + ], + "text/plain": [ + " 0 1 2 3 4 5 6 7 8 \\\n", + "0 129 612.0 153000.0 6000.0 0.0 0.0 118470648.0 0.0 628.0 \n", + "1 120 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 120 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 76 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 120 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + " 9 ... 4854 4855 4856 4857 4858 4859 4860 4861 4862 4863 \n", + "0 6000.0 ... 0.0 132.0 140.0 137.0 135.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[5 rows x 4864 columns]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "model_path = './NiR4_OFE_LGBM_model.txt'\n", + "data_path = './File_0-csv.txt'\n", + "data = pd.read_csv(data_path, header=None)\n", + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LightGBM] [Info] Construct bin mappers from text data time 2.14 seconds\n" + ] + } + ], + "source": [ + "booster = lightgbm.Booster(model_file=model_path)\n", + "\n", + "inference_data = lightgbm.Dataset(data_path, free_raw_data=False).construct()\n", + "inference_raw_data = inference_data.get_data()" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1.41436096, -0.27520206, -0.32896408, ..., 0.27021392,\n", + " 0.06719871, 2.11317219])" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "predictions_array_lgbmpython = booster.predict(\n", + " data=inference_raw_data,\n", + " num_threads=1,\n", + " predict_disable_shape_check=True,\n", + ")\n", + "predictions_array_lgbmpython" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[13:57:51] D:\\a\\1\\s\\src\\compiler\\ast_native.cc:711: Using ASTNativeCompiler\n", + "[13:57:51] D:\\a\\1\\s\\src\\compiler\\ast\\split.cc:29: Parallel compilation enabled; member trees will be divided into 16 translation units.\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:92: Code generation finished. Writing code to files...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu2.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file main.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file header.h...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu5.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu0.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu1.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu3.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu4.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu6.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu7.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu8.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu9.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu10.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu11.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu12.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu13.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu14.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file tu15.c...\n", + "[13:57:55] D:\\a\\1\\s\\src\\c_api\\c_api.cc:97: Writing file recipe.json...\n", + "[13:57:55] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\util.py:105: Compiling sources files in directory .\\tmpa3c08ggs into object files (*.obj)...\n", + "[13:58:21] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\util.py:135: Generating dynamic shared library .\\tmpa3c08ggs\\predictor.dll...\n", + "[13:58:23] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite\\contrib\\__init__.py:282: Generated shared library in 28.49 seconds\n" + ] + } + ], + "source": [ + "import treelite, treelite_runtime\n", + "\n", + "model = treelite.Model.load(\n", + " model_path,\n", + " model_format=\"lightgbm\"\n", + ")\n", + "model.export_lib(\n", + " toolchain=\"msvc\",\n", + " libpath=model_path + \".so\",\n", + " verbose=True,\n", + " params={'parallel_comp':16}\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[13:59:32] c:\\Users\\damajercak\\.conda\\envs\\lightgbmbenchmark\\lib\\site-packages\\treelite_runtime\\predictor.py:159: Dynamic shared library c:\\Projects\\lightgbm-benchmark\\NiR4_OFE_LGBM_model.txt.so has been successfully loaded into memory\n" + ] + } + ], + "source": [ + "predictor = treelite_runtime.Predictor(\n", + " model_path + '.so',\n", + " verbose=True,\n", + " nthread=1\n", + ")\n", + "dmat = treelite_runtime.DMatrix(data.to_numpy())" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [], + "source": [ + "predictions_array_treelite =predictor.predict(dmat)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "The maximum opset needed by this model is only 8.\n", + "The maximum opset needed by this model is only 8.\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[LightGBM] [Info] Construct bin mappers from text data time 2.43 seconds\n" + ] + } + ], + "source": [ + "from onnxconverter_common.data_types import FloatTensorType\n", + "from onnxmltools.convert import convert_lightgbm\n", + "import onnxruntime as ort\n", + "import numpy as np\n", + "\n", + "with open(model_path, \"r\") as mf:\n", + " model_str = mf.read()\n", + " model_str = model_str.replace(\n", + " \"objective=lambdarank\", \"objective=regression\"\n", + " )\n", + "booster_ort = lightgbm.Booster(model_str=model_str)\n", + "\n", + "onnx_input_types = [\n", + " (\n", + " \"input\",\n", + " FloatTensorType(\n", + " [1, inference_data.num_feature()]\n", + " ),\n", + " )\n", + "]\n", + "onnx_input_batch_types = [\n", + " (\n", + " \"input\",\n", + " FloatTensorType(\n", + " [inference_data.num_data(), inference_data.num_feature()]\n", + " ),\n", + " )\n", + "]\n", + "onnx_ml_model = convert_lightgbm(booster_ort, initial_types=onnx_input_types)\n", + "onnx_ml_batch_model = convert_lightgbm(booster_ort, initial_types=onnx_input_batch_types)\n", + "\n", + "sess_options = ort.SessionOptions()\n", + "sess_options.intra_op_num_threads = 0\n", + "sess_options.inter_op_num_threads = 0\n", + "\n", + "sess_options.execution_mode = (\n", + " ort.ExecutionMode.ORT_SEQUENTIAL\n", + ")\n", + "sess_options.graph_optimization_level = (\n", + " ort.GraphOptimizationLevel.ORT_ENABLE_ALL\n", + ")\n", + "sessionml = ort.InferenceSession(\n", + " onnx_ml_model.SerializeToString(), sess_options\n", + ")\n", + "sessionml_batch = ort.InferenceSession(\n", + " onnx_ml_batch_model.SerializeToString(), sess_options\n", + ")\n", + "inference_data = lightgbm.Dataset(\n", + " data_path, free_raw_data=False\n", + ").construct()\n", + "inference_raw_data = inference_data.get_data()\n", + "if type(inference_raw_data) == str:\n", + " inference_raw_data = np.loadtxt(\n", + " inference_raw_data, delimiter=\",\"\n", + " ).astype(np.float32)[:, : inference_data.num_feature()]" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "predictions_array_ort = sessionml_batch.run(\n", + " [sessionml.get_outputs()[0].name],\n", + " {sessionml.get_inputs()[0].name: inference_raw_data},\n", + ")[0][:, 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7666397998491448" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_inferencing_per_query = []\n", + "for i in range(len(inference_raw_data)):\n", + " prediction_time = timeit.timeit(\n", + " lambda: sessionml.run(\n", + " [sessionml.get_outputs()[0].name],\n", + " {sessionml.get_inputs()[0].name: inference_raw_data[i : i + 1]},\n", + " ),\n", + " number=1,\n", + " )\n", + " time_inferencing_per_query.append(prediction_time/1)\n", + "sum(time_inferencing_per_query)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.400464499998634" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "timeit.timeit(lambda: sessionml_batch.run(\n", + " [sessionml.get_outputs()[0].name],\n", + " {sessionml.get_inputs()[0].name: inference_raw_data},\n", + "), number=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[ 1.41436096 -0.27520206 -0.32896408 -0.08315643 -0.26660063 -0.30202675\n", + " -0.22120572 -0.35424621 -0.25634644 -0.06725079]\n", + "[ 1.7355299 0.2582493 0.28444618 0.51784474 0.49668223 -0.04218447\n", + " 0.12811233 0.20044815 -0.10399695 0.61548153]\n", + "[ 1.7355288 0.25824943 0.28444648 0.5178444 0.49668252 -0.04218467\n", + " 0.12811226 0.20044814 -0.10399713 0.6154818 ]\n" + ] + } + ], + "source": [ + "print(predictions_array_lgbmpython[:10])\n", + "print(predictions_array_treelite[:10])\n", + "print(predictions_array_ort[:10])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.8.13 ('lightgbmbenchmark')", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "218deddc5dc66f2d9cab81f1bf3043b58bb8ede28fae2157142347a8a27e0fa5" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml index b4fc6530..eb317acc 100644 --- a/src/scripts/inferencing/lightgbm_ort/spec.yaml +++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml @@ -1,10 +1,10 @@ $schema: http://azureml/sdk-2-0/CommandComponent.json name: lightgbm_ort_score -version: 0.0.6 +version: 0.0.7 display_name: "LightGBM Inferencing (ONNX RT)" type: CommandComponent description: "LightGBM inferencing using the ONNX Runtime." -is_deterministic: true +is_deterministic: false inputs: data: type: AnyDirectory From 0136a526532813eda382ca2d9b51e68fa7598b8c Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 12 Dec 2022 09:29:09 -0800 Subject: [PATCH 15/20] update ort inferencing --- src/pipelines/azureml/lightgbm_inferencing.py | 29 ++++++++++--------- .../lightgbm_ort/default.dockerfile | 8 ++--- .../inferencing/lightgbm_ort/spec.yaml | 5 +++- 3 files changed, 23 insertions(+), 19 deletions(-) diff --git a/src/pipelines/azureml/lightgbm_inferencing.py b/src/pipelines/azureml/lightgbm_inferencing.py index f272180d..21bbef06 100644 --- a/src/pipelines/azureml/lightgbm_inferencing.py +++ b/src/pipelines/azureml/lightgbm_inferencing.py @@ -8,7 +8,7 @@ > python src/pipelines/azureml/lightgbm_inferencing.py --exp-config conf/experiments/lightgbm-inferencing.yaml """ # pylint: disable=no-member -# NOTE: because it raises 'dict' has no 'outputs' member in dsl.pipeline construction +# NOTE: because it raises "dict" has no "outputs" member in dsl.pipeline construction import os import sys import json @@ -25,8 +25,8 @@ from azure.ml.component.environment import Docker # when running this script directly, needed to import common -LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..', '..')) -SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, 'src') +LIGHTGBM_REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) +SCRIPTS_SOURCES_ROOT = os.path.join(LIGHTGBM_REPO_ROOT, "src") if SCRIPTS_SOURCES_ROOT not in sys.path: logging.info(f"Adding {SCRIPTS_SOURCES_ROOT} to path") @@ -82,7 +82,7 @@ class lightgbm_inferencing_config: # pylint: disable=invalid-name @dsl.pipeline(name=f"lightgbm_inferencing", # pythonic name description=f"LightGBM inferencing on user defined dataset/model", - non_pipeline_parameters=['benchmark_custom_properties', 'config']) + non_pipeline_parameters=["benchmark_custom_properties", "config"]) def inferencing_task_pipeline_function(benchmark_custom_properties, config, data, @@ -107,9 +107,9 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, custom_properties = benchmark_custom_properties.copy() custom_properties.update({ # adding build settings (docker) - 'framework_build' : variant.build or "default", + "framework_build" : variant.build or "default", # adding variant_index to spot which variant is the reference - 'variant_index' : variant_index + "variant_index" : variant_index }) # passing as json string that each module parses to digest as tags/properties custom_properties = json.dumps(custom_properties) @@ -152,7 +152,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, data = data, model = model, verbose = False, - custom_properties = custom_properties.replace("\"","\\\"") + custom_properties = custom_properties.replace("\"", "\\\"") ) inferencing_step.runsettings.configure(target=config.compute.windows_cpu) @@ -173,7 +173,8 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, data = data, model = model, verbose = False, - custom_properties = custom_properties + custom_properties = custom_properties, + predict_disable_shape_check = predict_disable_shape_check, ) inferencing_step.runsettings.configure(target=config.compute.linux_cpu) @@ -186,7 +187,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, run_parallel = variant.parallel_exec, run_batch = variant.batch_exec, n_threads = variant.threads, - custom_properties = custom_properties + custom_properties = custom_properties.replace("\"", "\\\"") ) inferencing_step.runsettings.configure(target=config.compute.linux_cpu) @@ -216,7 +217,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, # provide step readable display name inferencing_step.node_name = format_run_name(f"inferencing_{variant.framework}_{variant_index}") - # return {key: output}' + # return {key: output} return pipeline_outputs @@ -225,7 +226,7 @@ def inferencing_task_pipeline_function(benchmark_custom_properties, non_pipeline_parameters=["workspace", "config"] # required to use config object ) def inferencing_all_tasks(workspace, config): - """Pipeline's main building function. + """Pipeline"s main building function. Args: workspace (azureml.core.Workspace): the AzureML workspace @@ -242,9 +243,9 @@ def inferencing_all_tasks(workspace, config): # create custom properties for this task benchmark_custom_properties = { - 'benchmark_name' : config.lightgbm_inferencing_config.benchmark_name, - 'benchmark_dataset' : inferencing_task.data.name, - 'benchmark_model' : inferencing_task.model.name, + "benchmark_name" : config.lightgbm_inferencing_config.benchmark_name, + "benchmark_dataset" : inferencing_task.data.name, + "benchmark_model" : inferencing_task.model.name, } inferencing_task_subgraph_step = inferencing_task_pipeline_function( diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile index 01e5614c..f76ac583 100644 --- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile @@ -7,16 +7,16 @@ ARG lightgbm_version="3.3.0" # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH # Install pip dependencies RUN HOROVOD_WITH_TENSORFLOW=1 \ - pip install 'pandas>=1.1,<1.2' \ - 'numpy>=1.10,<1.20' \ - 'matplotlib==3.4.3' \ + pip install 'pandas==1.5.2' \ + 'numpy==1.23.5' \ + 'matplotlib==3.6.2' \ 'scipy~=1.5.0' \ 'scikit-learn~=0.24.1' \ 'azureml-core==1.35.0' \ diff --git a/src/scripts/inferencing/lightgbm_ort/spec.yaml b/src/scripts/inferencing/lightgbm_ort/spec.yaml index eb317acc..d4288abd 100644 --- a/src/scripts/inferencing/lightgbm_ort/spec.yaml +++ b/src/scripts/inferencing/lightgbm_ort/spec.yaml @@ -40,6 +40,9 @@ outputs: type: AnyDirectory command: >- + apt update -y && + apt install numactl -y && + numactl -m 0 -N 0 -- python score.py --data {inputs.data} --model {inputs.model} @@ -49,7 +52,7 @@ command: >- [--run_batch {inputs.run_batch}] --predict_disable_shape_check {inputs.predict_disable_shape_check} --verbose {inputs.verbose} - [--custom_properties {inputs.custom_properties}] + [--custom_properties "{inputs.custom_properties}"] --cluster_auto_setup True environment: From 6a740f19b428425d51707905ca0a5d8ab7269fbb Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 12 Dec 2022 09:34:11 -0800 Subject: [PATCH 16/20] pass predict_disable_shape_check for ray --- src/scripts/inferencing/lightgbm_ray/score.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_ray/score.py b/src/scripts/inferencing/lightgbm_ray/score.py index 67561c68..105725ef 100644 --- a/src/scripts/inferencing/lightgbm_ray/score.py +++ b/src/scripts/inferencing/lightgbm_ray/score.py @@ -103,7 +103,8 @@ def run(self, args, logger, metrics_logger, unknown_args): predictions_array = lightgbm_ray.predict( booster, inference_data, - ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads) + ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads), + predict_disable_shape_check=bool(args.predict_disable_shape_check), ) timeit_loops = 10 @@ -111,7 +112,8 @@ def run(self, args, logger, metrics_logger, unknown_args): lambda: lightgbm_ray.predict( booster, inference_data, - ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads) + ray_params=lightgbm_ray.RayParams(num_actors=args.num_threads), + predict_disable_shape_check=bool(args.predict_disable_shape_check), ), number=timeit_loops, ) From c281d275315ff4a43105986e2cfc2264ef489c8d Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 12 Dec 2022 09:39:37 -0800 Subject: [PATCH 17/20] update docker images and pip --- .github/workflows/azureml_pipelines.yml | 2 +- .github/workflows/benchmark_scripts.yml | 2 +- .github/workflows/docs.yml | 2 +- docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile | 4 ++-- docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile | 4 ++-- docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile | 4 ++-- docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile | 4 ++-- docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile | 4 ++-- docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile | 4 ++-- docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile | 4 ++-- docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile | 2 +- docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile | 4 ++-- docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile | 2 +- docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile | 2 +- src/scripts/data_processing/generate_data/conda_env.yaml | 2 +- .../data_processing/lightgbm_data2bin/conda_env.yml | 2 +- src/scripts/data_processing/partition_data/conda_env.yml | 2 +- src/scripts/inferencing/custom_win_cli/conda_env.yaml | 2 +- src/scripts/inferencing/lightgbm_c_api/default.dockerfile | 4 ++-- src/scripts/inferencing/lightgbm_python/default.dockerfile | 4 ++-- src/scripts/inferencing/lightgbm_ray/default.dockerfile | 7 ++++--- src/scripts/inferencing/treelite_python/conda_env.yaml | 2 +- .../model_transformation/treelite_compile/conda_env.yaml | 2 +- src/scripts/sample/conda_env.yaml | 2 +- src/scripts/training/lightgbm_python/default.dockerfile | 2 +- src/scripts/training/lightgbm_ray/default.dockerfile | 2 +- src/scripts/training/ray_tune/default.dockerfile | 2 +- .../training/ray_tune_distributed/default.dockerfile | 2 +- 28 files changed, 41 insertions(+), 40 deletions(-) diff --git a/.github/workflows/azureml_pipelines.yml b/.github/workflows/azureml_pipelines.yml index 2efa112e..364b4cc0 100644 --- a/.github/workflows/azureml_pipelines.yml +++ b/.github/workflows/azureml_pipelines.yml @@ -34,7 +34,7 @@ jobs: - name: Install dependencies run: | - python -m pip install --upgrade pip==21.3.1 + python -m pip install --upgrade pip==22.2.2 pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11 sudo apt-get install libopenmpi-dev if [ -f requirements.txt ]; then pip install -r requirements.txt; fi diff --git a/.github/workflows/benchmark_scripts.yml b/.github/workflows/benchmark_scripts.yml index 35b88423..75f2e1f6 100644 --- a/.github/workflows/benchmark_scripts.yml +++ b/.github/workflows/benchmark_scripts.yml @@ -42,7 +42,7 @@ jobs: - name: Install dependencies run: | sudo apt-get install libopenmpi-dev - python -m pip install --upgrade pip==21.3.1 + python -m pip install --upgrade pip==22.2.2 pip install flake8==3.9.1 pytest~=6.2 pytest-cov~=2.11 if [ -f requirements.txt ]; then pip install -r requirements.txt; fi # hotfix for azurecli issue diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 2ce8b02d..9ed1a320 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -32,7 +32,7 @@ jobs: - name: pip install run: | - python -m pip install --upgrade pip==21.3.1 + python -m pip install --upgrade pip==22.2.2 python -m pip install markdown-include==0.7.0 mkdocstrings==0.19.0 mkdocstrings-python==0.7.1 mkdocs-material==8.4.2 livereload==2.6.3 diff --git a/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile b/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile index d8b46623..7f3748d3 100644 --- a/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile +++ b/docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1-patch/20211109.1" # Those arguments will NOT be used by AzureML @@ -54,7 +54,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile b/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile index 7041380f..a7233019 100644 --- a/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile +++ b/docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0-patch/20211109.1" # Those arguments will NOT be used by AzureML @@ -54,7 +54,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile b/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile index 67eb7d88..6c05ad28 100644 --- a/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile +++ b/docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.2.1/20211109.1" # Those arguments will NOT be used by AzureML @@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile b/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile index 7f415f76..b42984f0 100644 --- a/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile +++ b/docker/lightgbm-v3.2.1/linux_cpu_mpi_pip.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.2.1/20211108.1" # Those arguments will NOT be used by AzureML @@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile b/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile index 1f85db8b..58104eb0 100644 --- a/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile +++ b/docker/lightgbm-v3.2.1/linux_cuda_build.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cuda.build.version="3.2.1/20211108.1" # Those arguments will NOT be used by AzureML @@ -73,7 +73,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile b/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile index e3d5cc01..047f985a 100644 --- a/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile +++ b/docker/lightgbm-v3.2.1/linux_gpu_build.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.gpu.build.version="3.2.1/20211108.1" # Those arguments will NOT be used by AzureML @@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile b/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile index a8e6dea7..4cba3571 100644 --- a/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile +++ b/docker/lightgbm-v3.2.1/linux_gpu_pip.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-cuda10.1-cudnn7-ubuntu18.04 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-cuda11.6-cudnn8-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.gpu.pip.version="3.2.1/20211108.1" # Those arguments will NOT be used by AzureML # they are here just to allow for lightgbm-benchmark build to actually check @@ -9,7 +9,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile b/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile index cf9f2a6b..9e984465 100644 --- a/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile +++ b/docker/lightgbm-v3.2.1/windows_cpu_pip.dockerfile @@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment #ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile b/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile index 4f66f5a0..09b75851 100644 --- a/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile +++ b/docker/lightgbm-v3.3.0/linux_cpu_mpi_build.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1" # Those arguments will NOT be used by AzureML @@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile b/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile index 59ca3f32..e0d9056c 100644 --- a/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile +++ b/docker/lightgbm-v3.3.0/linux_cpu_mpi_pip.dockerfile @@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile b/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile index f40508e0..4d974d65 100644 --- a/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile +++ b/docker/lightgbm-v3.3.0/windows_cpu_pip.dockerfile @@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment #ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/src/scripts/data_processing/generate_data/conda_env.yaml b/src/scripts/data_processing/generate_data/conda_env.yaml index 223c34f7..d454ecdb 100644 --- a/src/scripts/data_processing/generate_data/conda_env.yaml +++ b/src/scripts/data_processing/generate_data/conda_env.yaml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - numpy==1.21.2 - scikit-learn==0.24.2 diff --git a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml index 2c403f37..b7f65aba 100644 --- a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml +++ b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - numpy==1.21.2 - scikit-learn==0.24.2 diff --git a/src/scripts/data_processing/partition_data/conda_env.yml b/src/scripts/data_processing/partition_data/conda_env.yml index 39dabefc..395ab493 100644 --- a/src/scripts/data_processing/partition_data/conda_env.yml +++ b/src/scripts/data_processing/partition_data/conda_env.yml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - numpy==1.21.2 - scikit-learn==0.24.2 diff --git a/src/scripts/inferencing/custom_win_cli/conda_env.yaml b/src/scripts/inferencing/custom_win_cli/conda_env.yaml index 78eed94f..cd181b8e 100644 --- a/src/scripts/inferencing/custom_win_cli/conda_env.yaml +++ b/src/scripts/inferencing/custom_win_cli/conda_env.yaml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - azureml-defaults==1.35.0 - azureml-mlflow==1.35.0 diff --git a/src/scripts/inferencing/lightgbm_c_api/default.dockerfile b/src/scripts/inferencing/lightgbm_c_api/default.dockerfile index f50022c5..4f22ca78 100644 --- a/src/scripts/inferencing/lightgbm_c_api/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_c_api/default.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20211012.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.build.version="3.3.0/20211115.1" # Those arguments will NOT be used by AzureML @@ -49,7 +49,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/src/scripts/inferencing/lightgbm_python/default.dockerfile b/src/scripts/inferencing/lightgbm_python/default.dockerfile index 419a5444..6ceda711 100644 --- a/src/scripts/inferencing/lightgbm_python/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_python/default.dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/azureml/openmpi3.1.2-ubuntu18.04:20210615.v1 +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 LABEL lightgbmbenchmark.linux.cpu.mpi.pip.version="3.3.0/20211210.1" # Those arguments will NOT be used by AzureML @@ -10,7 +10,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/src/scripts/inferencing/lightgbm_ray/default.dockerfile b/src/scripts/inferencing/lightgbm_ray/default.dockerfile index 67a87a2b..4cea04e3 100644 --- a/src/scripts/inferencing/lightgbm_ray/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_ray/default.dockerfile @@ -5,15 +5,15 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH # Install pip dependencies RUN HOROVOD_WITH_TENSORFLOW=1 \ - pip install 'pandas>=1.1,<1.2' \ - 'numpy>=1.10,<1.20' \ + pip install 'pandas==1.5.2' \ + 'numpy==1.23.5' \ 'matplotlib==3.4.3' \ 'scipy~=1.5.0' \ 'scikit-learn~=0.24.1' \ @@ -23,4 +23,5 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \ 'azureml-telemetry==1.35.0' \ 'mpi4py==3.1.1' \ 'ray==1.9.2' \ + 'protobuf==3.20' \ 'lightgbm-ray==0.1.2' diff --git a/src/scripts/inferencing/treelite_python/conda_env.yaml b/src/scripts/inferencing/treelite_python/conda_env.yaml index b31a7368..3d04b774 100644 --- a/src/scripts/inferencing/treelite_python/conda_env.yaml +++ b/src/scripts/inferencing/treelite_python/conda_env.yaml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - azureml-defaults==1.35.0 - azureml-mlflow==1.35.0 diff --git a/src/scripts/model_transformation/treelite_compile/conda_env.yaml b/src/scripts/model_transformation/treelite_compile/conda_env.yaml index b31a7368..3d04b774 100644 --- a/src/scripts/model_transformation/treelite_compile/conda_env.yaml +++ b/src/scripts/model_transformation/treelite_compile/conda_env.yaml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - azureml-defaults==1.35.0 - azureml-mlflow==1.35.0 diff --git a/src/scripts/sample/conda_env.yaml b/src/scripts/sample/conda_env.yaml index 0201788d..be0745eb 100644 --- a/src/scripts/sample/conda_env.yaml +++ b/src/scripts/sample/conda_env.yaml @@ -3,7 +3,7 @@ channels: - defaults dependencies: - python=3.8 -- pip=20.0 +- pip=22.2.2 - pip: - azureml-defaults==1.35.0 - azureml-mlflow==1.35.0 diff --git a/src/scripts/training/lightgbm_python/default.dockerfile b/src/scripts/training/lightgbm_python/default.dockerfile index 6848faf6..8c12a696 100644 --- a/src/scripts/training/lightgbm_python/default.dockerfile +++ b/src/scripts/training/lightgbm_python/default.dockerfile @@ -31,7 +31,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/src/scripts/training/lightgbm_ray/default.dockerfile b/src/scripts/training/lightgbm_ray/default.dockerfile index 67a87a2b..0f0db1aa 100644 --- a/src/scripts/training/lightgbm_ray/default.dockerfile +++ b/src/scripts/training/lightgbm_ray/default.dockerfile @@ -5,7 +5,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/src/scripts/training/ray_tune/default.dockerfile b/src/scripts/training/ray_tune/default.dockerfile index fcf659e6..350d9fd6 100644 --- a/src/scripts/training/ray_tune/default.dockerfile +++ b/src/scripts/training/ray_tune/default.dockerfile @@ -7,7 +7,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH diff --git a/src/scripts/training/ray_tune_distributed/default.dockerfile b/src/scripts/training/ray_tune_distributed/default.dockerfile index 5bb04a04..acba0893 100644 --- a/src/scripts/training/ray_tune_distributed/default.dockerfile +++ b/src/scripts/training/ray_tune_distributed/default.dockerfile @@ -7,7 +7,7 @@ ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ - python=3.8 pip=20.2.4 + python=3.8 pip=22.2.2 # Prepend path to AzureML conda environment ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH From 3a36ce64c8e060c806690e9eb9286916fe315c94 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 12 Dec 2022 13:12:28 -0800 Subject: [PATCH 18/20] update ort docker deps --- src/scripts/inferencing/lightgbm_ort/default.dockerfile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile index f76ac583..caec0a91 100644 --- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile @@ -1,5 +1,4 @@ FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest -LABEL lightgbmbenchmark.linux.cpu.ray.version="0.1.2/20220111.1" ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm @@ -30,4 +29,4 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \ # install lightgbm with mpi RUN pip install lightgbm==${lightgbm_version} \ - pip install 'protobuf==3.20' \ No newline at end of file + pip install 'protobuf==3.20.3' \ No newline at end of file From c86e0982c68180ce2ab2fd99dc929af15139dc24 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Mon, 12 Dec 2022 13:14:09 -0800 Subject: [PATCH 19/20] update ray docker deps --- src/scripts/inferencing/lightgbm_ray/default.dockerfile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_ray/default.dockerfile b/src/scripts/inferencing/lightgbm_ray/default.dockerfile index 4cea04e3..10b639dd 100644 --- a/src/scripts/inferencing/lightgbm_ray/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_ray/default.dockerfile @@ -22,6 +22,6 @@ RUN HOROVOD_WITH_TENSORFLOW=1 \ 'azureml-mlflow==1.35.0' \ 'azureml-telemetry==1.35.0' \ 'mpi4py==3.1.1' \ - 'ray==1.9.2' \ - 'protobuf==3.20' \ - 'lightgbm-ray==0.1.2' + 'protobuf==3.20.3' \ + 'ray==2.1.0' \ + 'lightgbm-ray==0.1.8' From 75103fdff287032d77fc31b8b8f2104b710244a2 Mon Sep 17 00:00:00 2001 From: David Majercak Date: Tue, 13 Dec 2022 23:54:13 -0800 Subject: [PATCH 20/20] only essential python packages --- .../lightgbm_ort/default.dockerfile | 21 +++----- src/scripts/inferencing/lightgbm_ort/score.py | 49 +++++++------------ 2 files changed, 24 insertions(+), 46 deletions(-) diff --git a/src/scripts/inferencing/lightgbm_ort/default.dockerfile b/src/scripts/inferencing/lightgbm_ort/default.dockerfile index caec0a91..e161bfbb 100644 --- a/src/scripts/inferencing/lightgbm_ort/default.dockerfile +++ b/src/scripts/inferencing/lightgbm_ort/default.dockerfile @@ -1,8 +1,8 @@ -FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:latest +FROM mcr.microsoft.com/azureml/openmpi4.1.0-ubuntu20.04:20221129.v1 ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/lightgbm -ARG lightgbm_version="3.3.0" +ARG lightgbm_version="3.3.3" # Create conda environment RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ @@ -12,20 +12,13 @@ RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \ ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH # Install pip dependencies -RUN HOROVOD_WITH_TENSORFLOW=1 \ - pip install 'pandas==1.5.2' \ - 'numpy==1.23.5' \ +RUN pip install 'numpy==1.23.5' \ 'matplotlib==3.6.2' \ - 'scipy~=1.5.0' \ - 'scikit-learn~=0.24.1' \ - 'azureml-core==1.35.0' \ - 'azureml-defaults==1.35.0' \ - 'azureml-mlflow==1.35.0' \ - 'azureml-telemetry==1.35.0' \ - 'mpi4py==3.1.1' \ - 'onnxruntime==1.12.1' \ + 'psutil==5.9.4'\ + 'azureml-mlflow==1.48.0' \ + 'onnxruntime==1.13.1' \ 'onnxmltools==1.11.1' \ - 'onnxconverter-common==1.12.2' + 'onnxconverter-common==1.13.0' # install lightgbm with mpi RUN pip install lightgbm==${lightgbm_version} \ diff --git a/src/scripts/inferencing/lightgbm_ort/score.py b/src/scripts/inferencing/lightgbm_ort/score.py index b5ee24be..f6e585c9 100644 --- a/src/scripts/inferencing/lightgbm_ort/score.py +++ b/src/scripts/inferencing/lightgbm_ort/score.py @@ -6,9 +6,7 @@ """ import os import sys -import argparse import logging -import time import timeit import numpy as np from distutils.util import strtobool @@ -155,34 +153,23 @@ def run(self, args, logger, metrics_logger, unknown_args): assert args.data_format == "CSV" with metrics_logger.log_time_block("time_data_loading"): # NOTE: this is bad, but allows for libsvm format (not just numpy) - inference_data = lightgbm.Dataset( - args.data, free_raw_data=False - ).construct() - inference_raw_data = inference_data.get_data() - if type(inference_raw_data) == str: - inference_raw_data = np.loadtxt( - inference_raw_data, delimiter="," - ).astype(np.float32)[:, : inference_data.num_feature()] + # inference_data = lightgbm.Dataset( + # args.data, free_raw_data=False + # ).construct() + # inference_raw_data = inference_data.get_data() + # if type(inference_raw_data) == str: + inference_raw_data = np.loadtxt( + args.data, delimiter="," + ).astype(np.float32)[:, : booster.num_feature()] logger.info(f"Converting model to ONNX") onnx_input_types = [ ( "input", - FloatTensorType([1, inference_data.num_feature()]), - ) - ] - onnx_batch_input_types = [ - ( - "input", - FloatTensorType( - [inference_data.num_data(), inference_data.num_feature()] - ), + FloatTensorType([None, inference_raw_data.shape[1]]), ) ] onnx_ml_model = convert_lightgbm(booster, initial_types=onnx_input_types) - onnx_ml_batch_model = convert_lightgbm( - booster, initial_types=onnx_batch_input_types - ) logger.info(f"Creating inference session") sess_options = ort.SessionOptions() @@ -206,14 +193,11 @@ def run(self, args, logger, metrics_logger, unknown_args): sessionml = ort.InferenceSession( onnx_ml_model.SerializeToString(), sess_options ) - sessionml_batch = ort.InferenceSession( - onnx_ml_batch_model.SerializeToString(), sess_options - ) # capture data shape as property metrics_logger.set_properties( - inference_data_length=inference_data.num_data(), - inference_data_width=inference_data.num_feature(), + inference_data_length=inference_raw_data.shape[0], + inference_data_width=inference_raw_data.shape[1], ) logger.info(f"Running .predict()") @@ -223,8 +207,9 @@ def run(self, args, logger, metrics_logger, unknown_args): sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data[0:1]}, - )[0] - predictions_array = sessionml_batch.run( + ) + + predictions_array = sessionml.run( [sessionml.get_outputs()[0].name], {sessionml.get_inputs()[0].name: inference_raw_data}, )[0] @@ -235,9 +220,9 @@ def run(self, args, logger, metrics_logger, unknown_args): if args.run_batch: batch_length = len(inference_raw_data) prediction_time = timeit.timeit( - lambda: sessionml_batch.run( - [sessionml_batch.get_outputs()[0].name], - {sessionml_batch.get_inputs()[0].name: inference_raw_data}, + lambda: sessionml.run( + [sessionml.get_outputs()[0].name], + {sessionml.get_inputs()[0].name: inference_raw_data}, ), number=timeit_loops, )