diff --git a/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml b/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml index dfe29347..abd67da4 100644 --- a/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml +++ b/pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml @@ -28,7 +28,7 @@ module_loader: # module loading params lightgbm_inferencing: # name of your particular benchmark - benchmark_name: "benchmark-inferencing-20211109.3" # need to be provided at runtime! + benchmark_name: "benchmark-inferencing-20211124.1" # need to be provided at runtime! tasks: - data: @@ -82,11 +82,19 @@ lightgbm_inferencing: variants: - framework: lightgbm_python # v3.3.0 via pypi + num_threads: 1 + - framework: lightgbm_c_api # v3.3.0 with C API prediction + - framework: lightgbm_c_api # v3.3.0 with C API prediction build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile + - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile + - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile + - framework: treelite_python # v1.3.0 + num_threads: 1 + batch_size: 0 # use whole file as batch diff --git a/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml b/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml index 79712320..9647e538 100644 --- a/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml +++ b/pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml @@ -43,12 +43,21 @@ lightgbm_inferencing: # list all inferencing frameworks and their builds variants: - framework: lightgbm_python # v3.3.0 via pypi + num_threads: 1 + - framework: lightgbm_c_api # v3.3.0 with C API prediction + - framework: lightgbm_c_api # v3.3.0 with C API prediction build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile + - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile + - framework: lightgbm_c_api # v3.2.1 with C API prediction build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile + - framework: treelite_python # v1.3.0 + num_threads: 1 + batch_size: 0 # use whole file as batch + diff --git a/pipelines/azureml/pipelines/lightgbm_inferencing.py b/pipelines/azureml/pipelines/lightgbm_inferencing.py index dca94266..e4091d57 100644 --- a/pipelines/azureml/pipelines/lightgbm_inferencing.py +++ b/pipelines/azureml/pipelines/lightgbm_inferencing.py @@ -130,6 +130,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo inferencing_step = treelite_score_module( data = data, compiled_model = treelite_compile_step.outputs.compiled_model, + num_threads = variant.num_threads, + batch_size = variant.batch_size, verbose = False, custom_properties = custom_properties ) @@ -140,6 +142,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo inferencing_step = lightgbm_c_api_score_module( data = data, model = model, + num_threads = variant.num_threads, + # batch_size = variant.batch_size, # not supported yet predict_disable_shape_check = predict_disable_shape_check, verbose = False, custom_properties = custom_properties @@ -151,6 +155,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo inferencing_step = lightgbm_cli_score_module( data = data, model = model, + num_threads = variant.num_threads, + # batch_size = variant.batch_size, # not supported yet predict_disable_shape_check = predict_disable_shape_check, verbose = False, custom_properties = custom_properties @@ -162,6 +168,8 @@ def lightgbm_inferencing_pipeline_function(benchmark_custom_properties, data, mo inferencing_step = lightgbm_python_score_module( data = data, model = model, + num_threads = variant.num_threads, + # batch_size = variant.batch_size, # not supported yet predict_disable_shape_check = predict_disable_shape_check, verbose = False, custom_properties = custom_properties diff --git a/src/common/io.py b/src/common/io.py index 84898249..2e8317ad 100644 --- a/src/common/io.py +++ b/src/common/io.py @@ -8,6 +8,7 @@ import os import argparse import logging +import csv def input_file_path(path): """ Argparse type to resolve input path as single file from directory. @@ -225,3 +226,42 @@ def run(self, input_path, output_path): self.split_by_append(input_files, output_path, self.number) else: raise NotImplementedError(f"Mode {self.mode} not implemented.") + + +class CustomLightGBMDataBatchIterator(): + def __init__(self, file_path, batch_size=0, file_format="csv", **kwargs): + self.file_path = file_path + self.batch_size = batch_size + self.file_format = file_format + self.reader_options = kwargs + + def iter(self): + if self.file_format == "csv": + with open(self.file_path, "r") as i_file: + reader = csv.reader(i_file, **self.reader_options) + + batch = [] + if self.batch_size == 0: + # use the entire file as a batch + batch = [ + [ + float(col) for col in row # convert all values to float for lightgbm + ] for row in reader + ] + elif self.batch_size > 1: + # create batches + for row in reader: + batch.append( + [ float(col) for col in row ] # convert all values to float for lightgbm + ) + if len(batch) >= self.batch_size: + yield batch + batch = [] # reset batch + else: + raise ValueError("batch_size must be >= 0") + + # any remaining batch, or whole file + if len(batch) >= 0: + yield batch + else: + raise NotImplementedError("file_format={self.file_format} is not implemented yet.") diff --git a/src/common/tasks.py b/src/common/tasks.py index 616f1172..d8b5d39b 100644 --- a/src/common/tasks.py +++ b/src/common/tasks.py @@ -24,10 +24,15 @@ class inferencing_task: @dataclass class inferencing_variants: + # framework framework: str = MISSING build: Optional[str] = None os: str = "Linux" # linux or windows, linux by default + # parameters + batch_size: int = 0 # use whole file as batch + num_threads: int = 1 # use only one thread + @dataclass class data_generation_task: task: str = MISSING diff --git a/src/scripts/inferencing/lightgbm_c_api/spec.yaml b/src/scripts/inferencing/lightgbm_c_api/spec.yaml index 59d9bfad..fd664142 100644 --- a/src/scripts/inferencing/lightgbm_c_api/spec.yaml +++ b/src/scripts/inferencing/lightgbm_c_api/spec.yaml @@ -18,7 +18,7 @@ inputs: type: Boolean description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" default: False - n_threads: + num_threads: type: Integer default: 1 verbose: @@ -37,7 +37,7 @@ command: >- python score.py --data {inputs.data} --model {inputs.model} - --num_threads {inputs.n_threads} + --num_threads {inputs.num_threads} --output {outputs.predictions} --predict_disable_shape_check {inputs.predict_disable_shape_check} --verbose {inputs.verbose} diff --git a/src/scripts/inferencing/lightgbm_cli/spec.yaml b/src/scripts/inferencing/lightgbm_cli/spec.yaml index 9b51d5ab..41bfa111 100644 --- a/src/scripts/inferencing/lightgbm_cli/spec.yaml +++ b/src/scripts/inferencing/lightgbm_cli/spec.yaml @@ -18,9 +18,9 @@ inputs: type: Boolean description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" optional: true - n_threads: + num_threads: type: Integer - optional: true + default: 1 lightgbm_exec_path: type: String optional: true @@ -37,7 +37,7 @@ command: >- python score.py --data {inputs.data} --model {inputs.model} - [--num_threads {inputs.n_threads}] + --num_threads {inputs.num_threads} [--lightgbm_exec_path {inputs.lightgbm_exec_path}] [--predict_disable_shape_check {inputs.predict_disable_shape_check}] [--verbose {inputs.verbose}] diff --git a/src/scripts/inferencing/lightgbm_python/spec.yaml b/src/scripts/inferencing/lightgbm_python/spec.yaml index 7fcbebca..cf40e260 100644 --- a/src/scripts/inferencing/lightgbm_python/spec.yaml +++ b/src/scripts/inferencing/lightgbm_python/spec.yaml @@ -18,9 +18,9 @@ inputs: type: Boolean description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data" default: False - n_threads: + num_threads: type: Integer - optional: true + default: 1 verbose: type: Boolean default: False @@ -38,7 +38,7 @@ command: >- --data {inputs.data} --model {inputs.model} --output {outputs.predictions} - [--num_threads {inputs.n_threads}] + --num_threads {inputs.num_threads} --predict_disable_shape_check {inputs.predict_disable_shape_check} --verbose {inputs.verbose} [--custom_properties {inputs.custom_properties}] diff --git a/src/scripts/inferencing/treelite_python/conda_env.yaml b/src/scripts/inferencing/treelite_python/conda_env.yaml index b31a7368..0d08ea4c 100644 --- a/src/scripts/inferencing/treelite_python/conda_env.yaml +++ b/src/scripts/inferencing/treelite_python/conda_env.yaml @@ -12,3 +12,4 @@ dependencies: - treelite_runtime==2.1.0 - pandas>=1.1,<1.2 - numpy>=1.10,<1.20 + - matplotlib==3.4.3 diff --git a/src/scripts/inferencing/treelite_python/score.py b/src/scripts/inferencing/treelite_python/score.py index 26fef4bc..e846c9a5 100644 --- a/src/scripts/inferencing/treelite_python/score.py +++ b/src/scripts/inferencing/treelite_python/score.py @@ -8,9 +8,9 @@ import sys import argparse import logging -import numpy +import time +import numpy as np from distutils.util import strtobool -import pandas as pd import treelite, treelite_runtime # Add the right path to PYTHONPATH @@ -18,20 +18,20 @@ COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "..")) if COMMON_ROOT not in sys.path: - print(f"Adding {COMMON_ROOT} to PYTHONPATH") + logging.info(f"Adding {COMMON_ROOT} to PYTHONPATH") sys.path.append(str(COMMON_ROOT)) # useful imports from common from common.components import RunnableScript -from common.io import input_file_path +from common.io import input_file_path, CustomLightGBMDataBatchIterator class TreeLightInferencingScript(RunnableScript): def __init__(self): super().__init__( - task = 'score', + task = "score", framework = 'treelite_python', - framework_version = treelite.__version__ + framework_version = "PYTHON_API."+str(treelite.__version__) ) @classmethod @@ -61,6 +61,8 @@ def get_arg_parser(cls, parser=None): group_params = parser.add_argument_group("Scoring parameters") group_params.add_argument("--num_threads", required=False, default=1, type=int, help="number of threads") + group_params.add_argument("--batch_size", + required=False, default=0, type=int, help="size of batches for predict call") return parser @@ -76,31 +78,66 @@ def run(self, args, logger, metrics_logger, unknown_args): """ # record relevant parameters metrics_logger.log_parameters( - num_threads=args.num_threads + num_threads=args.num_threads, + batch_size=args.batch_size, ) + # make sure the output argument exists if args.output: - # make sure the output argument exists os.makedirs(args.output, exist_ok=True) # and create your own file inside the output args.output = os.path.join(args.output, "predictions.txt") + logger.info(f"Loading model from {args.so_path}") + predictor = treelite_runtime.Predictor( + args.so_path, + verbose=True, + nthread=args.num_threads + ) - logger.info(f"Loading data for inferencing") - with metrics_logger.log_time_block("time_data_loading"): - my_data = pd.read_csv(args.data).to_numpy() - - predictor = treelite_runtime.Predictor( - args.so_path, - verbose=True, - nthread=args.num_threads - ) - dmat = treelite_runtime.DMatrix(my_data) + # accumulate predictions and latencies + predictions = [] + time_inferencing_per_batch = [] + batch_lengths = [] + + # loop through batches + for batch in CustomLightGBMDataBatchIterator(args.data, batch_size=args.batch_size, file_format="csv").iter(): + if len(batch) == 0: + break + batch_lengths.append(len(batch)) + + # transform into dense matrix for treelite + batch_data = np.array(batch) + batch_dmat = treelite_runtime.DMatrix(batch_data) + + # run prediction on batch + batch_start_time = time.monotonic() + predictions.extend(predictor.predict(batch_dmat)) + time_inferencing_per_batch.append((time.monotonic() - batch_start_time)) # usecs + + # log overall time + metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_batch)) + + # use helper to log latency with the right metric names + metrics_logger.log_inferencing_latencies( + time_inferencing_per_batch, + batch_length=batch_lengths, + factor_to_usecs=1000000.0 # values are in seconds + ) - logger.info(f"Running .predict()") - with metrics_logger.log_time_block("time_inferencing"): - predictor.predict(dmat) + if args.output: + np.savetxt( + args.output, + predictions, + fmt='%f', + delimiter=',', + newline='\n', + header='', + footer='', + comments='# ', + encoding=None + ) def get_arg_parser(parser=None): diff --git a/src/scripts/inferencing/treelite_python/spec.yaml b/src/scripts/inferencing/treelite_python/spec.yaml index 5e9b18f6..d1e7a804 100644 --- a/src/scripts/inferencing/treelite_python/spec.yaml +++ b/src/scripts/inferencing/treelite_python/spec.yaml @@ -15,9 +15,12 @@ inputs: type: AnyDirectory description: directory to the model optional: false - n_threads: + num_threads: type: Integer - optional: true + default: 1 + batch_size: + type: Integer + default: 0 # default: use whole file as a batch verbose: type: Boolean default: False @@ -31,7 +34,8 @@ command: >- python score.py --data {inputs.data} --so_path {inputs.compiled_model} - [--num_threads {inputs.n_threads}] + --num_threads {inputs.num_threads} + --batch_size {inputs.batch_size} --verbose {inputs.verbose} [--custom_properties {inputs.custom_properties}]