This repository was archived by the owner on Apr 8, 2024. It is now read-only.

[hold] Add batch latency measurement and percentiles to treelite_python #177

Open

jfomhover wants to merge 19 commits into main from jfomhover/treelitepercentiles

pipelines/azureml/conf/experiments/benchmarks/lightgbm-inferencing.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -28,7 +28,7 @@ module_loader: # module loading params @@
     lightgbm_inferencing:
       # name of your particular benchmark
-      benchmark_name: "benchmark-inferencing-20211109.3" # need to be provided at runtime!
+      benchmark_name: "benchmark-inferencing-20211124.1" # need to be provided at runtime!
       tasks:
         - data:
@@ Expand Down Expand Up / @@ -82,11 +82,19 @@ lightgbm_inferencing: @@
       variants:
         - framework: lightgbm_python # v3.3.0 via pypi
+          num_threads: 1
         - framework: lightgbm_c_api # v3.3.0 with C API prediction
         - framework: lightgbm_c_api # v3.3.0 with C API prediction
           build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
         - framework: lightgbm_c_api # v3.2.1 with C API prediction
           build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
         - framework: lightgbm_c_api # v3.2.1 with C API prediction
           build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
         - framework: treelite_python # v1.3.0
+          num_threads: 1
+          batch_size: 0 # use whole file as batch

pipelines/azureml/conf/experiments/lightgbm-inferencing.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -43,12 +43,21 @@ lightgbm_inferencing: @@
       # list all inferencing frameworks and their builds
       variants:
         - framework: lightgbm_python # v3.3.0 via pypi
+          num_threads: 1
         - framework: lightgbm_c_api # v3.3.0 with C API prediction
         - framework: lightgbm_c_api # v3.3.0 with C API prediction
           build: docker/lightgbm-custom/v330_patch_cpu_mpi_build.dockerfile
         - framework: lightgbm_c_api # v3.2.1 with C API prediction
           build: docker/lightgbm-v3.2.1/linux_cpu_mpi_build.dockerfile
         - framework: lightgbm_c_api # v3.2.1 with C API prediction
           build: docker/lightgbm-custom/v321_patch_cpu_mpi_build.dockerfile
         - framework: treelite_python # v1.3.0
+          num_threads: 1
+          batch_size: 0 # use whole file as batch

pipelines/azureml/pipelines/lightgbm_inferencing.py

-Original file line number
+Diff line change
@@ Expand Up @@
                         inferencing_step = treelite_score_module(
                             data = data,
                             compiled_model = treelite_compile_step.outputs.compiled_model,
+                            num_threads = variant.num_threads,
+                            batch_size = variant.batch_size,
                             verbose = False,
                             custom_properties = custom_properties
                         )
@@ Expand All @@
                         inferencing_step = lightgbm_c_api_score_module(
                             data = data,
                             model = model,
+                            num_threads = variant.num_threads,
+                            # batch_size = variant.batch_size, # not supported yet
                             predict_disable_shape_check = predict_disable_shape_check,
                             verbose = False,
                             custom_properties = custom_properties
@@ Expand All @@
                         inferencing_step = lightgbm_cli_score_module(
                             data = data,
                             model = model,
+                            num_threads = variant.num_threads,
+                            # batch_size = variant.batch_size, # not supported yet
                             predict_disable_shape_check = predict_disable_shape_check,
                             verbose = False,
                             custom_properties = custom_properties
@@ Expand All @@
                         inferencing_step = lightgbm_python_score_module(
                             data = data,
                             model = model,
+                            num_threads = variant.num_threads,
+                            # batch_size = variant.batch_size, # not supported yet
                             predict_disable_shape_check = predict_disable_shape_check,
                             verbose = False,
                             custom_properties = custom_properties
@@ Expand Down @@

src/common/io.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -8,6 +8,7 @@ @@
     import os
     import argparse
     import logging
+    import csv
     def input_file_path(path):
         """ Argparse type to resolve input path as single file from directory.
@@ Expand Down Expand Up / @@ -225,3 +226,42 @@ def run(self, input_path, output_path): @@
                 self.split_by_append(input_files, output_path, self.number)
             else:
                 raise NotImplementedError(f"Mode {self.mode} not implemented.")
+    class CustomLightGBMDataBatchIterator():
+        def __init__(self, file_path, batch_size=0, file_format="csv", **kwargs):
+            self.file_path = file_path
+            self.batch_size = batch_size
+            self.file_format = file_format
+            self.reader_options = kwargs
+        def iter(self):
+            if self.file_format == "csv":
+                with open(self.file_path, "r") as i_file:
+                    reader = csv.reader(i_file, **self.reader_options)
+                    batch = []
+                    if self.batch_size == 0:
+                        # use the entire file as a batch
+                        batch = [
+                            [
+                                float(col) for col in row # convert all values to float for lightgbm
+                            ] for row in reader
+                        ]
+                    elif self.batch_size > 1:
+                        # create batches
+                        for row in reader:
+                            batch.append(
+                                [ float(col) for col in row ] # convert all values to float for lightgbm
+                            )
+                            if len(batch) >= self.batch_size:
+                                yield batch
+                                batch = [] # reset batch
+                    else:
+                        raise ValueError("batch_size must be >= 0")
+                    # any remaining batch, or whole file
+                    if len(batch) >= 0:
+                        yield batch
+            else:
+                raise NotImplementedError("file_format={self.file_format} is not implemented yet.")

src/common/tasks.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -24,10 +24,15 @@ class inferencing_task: @@
     @dataclass
     class inferencing_variants:
+        # framework
         framework: str = MISSING
         build: Optional[str] = None
         os: str = "Linux" # linux or windows, linux by default
+        # parameters
+        batch_size: int = 0 # use whole file as batch
+        num_threads: int = 1 # use only one thread
     @dataclass
     class data_generation_task:
         task: str = MISSING
@@ Expand Down @@

src/scripts/inferencing/lightgbm_c_api/spec.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -18,7 +18,7 @@ inputs:
  
        type: Boolean

        description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"

        default: False

      n_threads:

      num_threads:

        type: Integer

        default: 1

      verbose:

    @@ -37,7 +37,7 @@ command: >-
  
      python score.py

      --data {inputs.data}

      --model {inputs.model}

      --num_threads {inputs.n_threads}

      --num_threads {inputs.num_threads}

      --output {outputs.predictions}

      --predict_disable_shape_check {inputs.predict_disable_shape_check}

      --verbose {inputs.verbose}

src/scripts/inferencing/lightgbm_cli/spec.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -18,9 +18,9 @@ inputs:
  
        type: Boolean

        description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"

        optional: true

      n_threads:

      num_threads:

        type: Integer

        optional: true

        default: 1

      lightgbm_exec_path:

        type: String

        optional: true

    @@ -37,7 +37,7 @@ command: >-
  
      python score.py

      --data {inputs.data}

      --model {inputs.model}

      [--num_threads {inputs.n_threads}]

      --num_threads {inputs.num_threads}

      [--lightgbm_exec_path {inputs.lightgbm_exec_path}]

      [--predict_disable_shape_check {inputs.predict_disable_shape_check}]

      [--verbose {inputs.verbose}]

src/scripts/inferencing/lightgbm_python/spec.yaml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -18,9 +18,9 @@ inputs:
  
        type: Boolean

        description: "control whether or not LightGBM raises an error when you try to predict on data with a different number of features than the training data"

        default: False

      n_threads:

      num_threads:

        type: Integer

        optional: true

        default: 1

      verbose:

        type: Boolean

        default: False

    @@ -38,7 +38,7 @@ command: >-
  
      --data {inputs.data}

      --model {inputs.model}

      --output {outputs.predictions}

      [--num_threads {inputs.n_threads}]

      --num_threads {inputs.num_threads}

      --predict_disable_shape_check {inputs.predict_disable_shape_check}

      --verbose {inputs.verbose}

      [--custom_properties {inputs.custom_properties}]

src/scripts/inferencing/treelite_python/conda_env.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -12,3 +12,4 @@ dependencies: @@
       - treelite_runtime==2.1.0
       - pandas>=1.1,<1.2
       - numpy>=1.10,<1.20
+      - matplotlib==3.4.3

src/scripts/inferencing/treelite_python/score.py

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -8,30 +8,30 @@
  
    import sys

    import argparse

    import logging

    import numpy

    import time

    import numpy as np

    from distutils.util import strtobool

    import pandas as pd

    import treelite, treelite_runtime

    # Add the right path to PYTHONPATH

    # so that you can import from common.*

    COMMON_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", ".."))

    if COMMON_ROOT not in sys.path:

        print(f"Adding {COMMON_ROOT} to PYTHONPATH")

        logging.info(f"Adding {COMMON_ROOT} to PYTHONPATH")

        sys.path.append(str(COMMON_ROOT))

    # useful imports from common

    from common.components import RunnableScript

    from common.io import input_file_path

    from common.io import input_file_path, CustomLightGBMDataBatchIterator

    class TreeLightInferencingScript(RunnableScript):

        def __init__(self):

            super().__init__(

                task = 'score',

                task = "score",

                framework = 'treelite_python',

                framework_version = treelite.__version__

                framework_version = "PYTHON_API."+str(treelite.__version__)

            )

        @classmethod

    @@ -61,6 +61,8 @@ def get_arg_parser(cls, parser=None):
  
            group_params = parser.add_argument_group("Scoring parameters")

            group_params.add_argument("--num_threads",

                required=False, default=1, type=int, help="number of threads")

            group_params.add_argument("--batch_size",

                required=False, default=0, type=int, help="size of batches for predict call")

            return parser

    @@ -76,31 +78,66 @@ def run(self, args, logger, metrics_logger, unknown_args):
  
            """

            # record relevant parameters

            metrics_logger.log_parameters(

                num_threads=args.num_threads

                num_threads=args.num_threads,

                batch_size=args.batch_size,

            )

            # make sure the output argument exists

            if args.output:

                # make sure the output argument exists

                os.makedirs(args.output, exist_ok=True)

                # and create your own file inside the output

                args.output = os.path.join(args.output, "predictions.txt")

            logger.info(f"Loading model from {args.so_path}")

            predictor = treelite_runtime.Predictor(

                args.so_path,

                verbose=True,

                nthread=args.num_threads

            )

            logger.info(f"Loading data for inferencing")

            with metrics_logger.log_time_block("time_data_loading"):

                my_data = pd.read_csv(args.data).to_numpy()

                predictor = treelite_runtime.Predictor(

                    args.so_path,

                    verbose=True,

                    nthread=args.num_threads

                )

                dmat = treelite_runtime.DMatrix(my_data)

            # accumulate predictions and latencies

            predictions = []

            time_inferencing_per_batch = []

            batch_lengths = []

            # loop through batches

            for batch in CustomLightGBMDataBatchIterator(args.data, batch_size=args.batch_size, file_format="csv").iter():

                if len(batch) == 0:

                    break

                batch_lengths.append(len(batch))

                # transform into dense matrix for treelite

                batch_data = np.array(batch)

                batch_dmat = treelite_runtime.DMatrix(batch_data)

                # run prediction on batch

                batch_start_time = time.monotonic()

                predictions.extend(predictor.predict(batch_dmat))

                time_inferencing_per_batch.append((time.monotonic() - batch_start_time)) # usecs

            # log overall time

            metrics_logger.log_metric("time_inferencing", sum(time_inferencing_per_batch))

            # use helper to log latency with the right metric names

            metrics_logger.log_inferencing_latencies(

                time_inferencing_per_batch,

                batch_length=batch_lengths,

                factor_to_usecs=1000000.0 # values are in seconds

            )

            logger.info(f"Running .predict()")

            with metrics_logger.log_time_block("time_inferencing"):

                predictor.predict(dmat)

            if args.output:

                np.savetxt(

                    args.output,

                    predictions,

                    fmt='%f',

                    delimiter=',',

                    newline='\n',

                    header='',

                    footer='',

                    comments='# ',

                    encoding=None

                )

    def get_arg_parser(parser=None):

src/scripts/inferencing/treelite_python/spec.yaml

-Original file line number
+Diff line change
@@ Expand Up / @@ -15,9 +15,12 @@ inputs: @@
         type: AnyDirectory
         description: directory to the model
         optional: false
-      n_threads:
+      num_threads:
         type: Integer
-        optional: true
+        default: 1
+      batch_size:
+        type: Integer
+        default: 0 # default: use whole file as a batch
       verbose:
         type: Boolean
         default: False
@@ Expand All / @@ -31,7 +34,8 @@ command: >- @@
       python score.py
       --data {inputs.data}
       --so_path {inputs.compiled_model}
-      [--num_threads {inputs.n_threads}]
+      --num_threads {inputs.num_threads}
+      --batch_size {inputs.batch_size}
       --verbose {inputs.verbose}
       [--custom_properties {inputs.custom_properties}]
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[hold] Add batch latency measurement and percentiles to treelite_python #177

Uh oh!

Diff view

Diff view

There are no files selected for viewing

[hold] Add batch latency measurement and percentiles to treelite_python #177

Are you sure you want to change the base?

Uh oh!

[hold] Add batch latency measurement and percentiles to treelite_python #177

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing