Unify latency metrics inside a helper function by jfomhover · Pull Request #180 · microsoft/lightgbm-benchmark

This repository was archived by the owner on Apr 8, 2024. It is now read-only.

src/common/metrics.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -165,6 +165,72 @@ def log_time_block(self, metric_name): @@
             # see class below with proper __enter__ and __exit__
             return LogTimeBlock(metric_name)
+        def log_inferencing_latencies(self, time_per_batch, batch_length=1, factor_to_usecs=1000000.0):
+            """Logs prediction latencies (for inferencing) with lots of fancy metrics and plots.
+            Args:
+                time_per_batch_list (List[float]): time per inferencing batch
+                batch_lengths (Union[List[int],int]): length of each batch (List or constant)
+                factor_to_usecs (float): factor to apply to time_per_batch to convert to microseconds
+            """
+            if isinstance(batch_length, list):
+                sum_batch_lengths = sum(batch_length)
+            else:
+                sum_batch_lengths = batch_length*len(time_per_batch)
+            # log metadata
+            self.log_metric("prediction_batches", len(time_per_batch))
+            self.log_metric("prediction_queries", sum_batch_lengths)
+            if len(time_per_batch) > 0:
+                self.log_metric("prediction_latency_avg", (sum(time_per_batch) * factor_to_usecs)/sum_batch_lengths) # usecs
+            # if there's more than 1 batch, compute percentiles
+            if len(time_per_batch) > 1:
+                import numpy as np
+                import matplotlib.pyplot as plt
+                # latency per batch
+                batch_run_times = np.array(time_per_batch) * factor_to_usecs
+                self.log_metric("batch_latency_p50_usecs", np.percentile(batch_run_times, 50))
+                self.log_metric("batch_latency_p75_usecs", np.percentile(batch_run_times, 75))
+                self.log_metric("batch_latency_p90_usecs", np.percentile(batch_run_times, 90))
+                self.log_metric("batch_latency_p95_usecs", np.percentile(batch_run_times, 95))
+                self.log_metric("batch_latency_p99_usecs", np.percentile(batch_run_times, 99))
+                # show the distribution prediction latencies
+                fig, ax = plt.subplots(1)
+                ax.hist(batch_run_times, bins=100)
+                ax.set_title("Latency-per-batch histogram (log scale)")
+                plt.xlabel("usecs")
+                plt.ylabel("occurence")
+                plt.yscale('log')
+                # record in mlflow
+                self.log_figure(fig, "batch_latency_log_histogram.png")
+                # latency per query
+                if isinstance(batch_length, list):
+                    prediction_latencies = np.array(time_per_batch) * factor_to_usecs / np.array(batch_length)
+                else:
+                    prediction_latencies = np.array(time_per_batch) * factor_to_usecs / batch_length
+                self.log_metric("prediction_latency_p50_usecs", np.percentile(prediction_latencies, 50))
+                self.log_metric("prediction_latency_p75_usecs", np.percentile(prediction_latencies, 75))
+                self.log_metric("prediction_latency_p90_usecs", np.percentile(prediction_latencies, 90))
+                self.log_metric("prediction_latency_p95_usecs", np.percentile(prediction_latencies, 95))
+                self.log_metric("prediction_latency_p99_usecs", np.percentile(prediction_latencies, 99))
+                # show the distribution prediction latencies
+                fig, ax = plt.subplots(1)
+                ax.hist(prediction_latencies, bins=100)
+                ax.set_title("Latency-per-prediction histogram (log scale)")
+                plt.xlabel("usecs")
+                plt.ylabel("occurence")
+                plt.yscale('log')
+                # record in mlflow
+                self.log_figure(fig, "prediction_latency_log_histogram.png")
     ########################
@@ Expand Down @@

src/scripts/inferencing/lightgbm_c_api/score.py

-Original file line number
+Diff line change
@@ Expand Up @@
                     else:
                         logger.warning(f"log metric {line} does not match expected pattern {row_pattern}")
-            if len(time_inferencing_per_query) > 1:
-                batch_run_times = np.array(time_inferencing_per_query)
-                metrics_logger.log_metric("batch_time_inferencing_p50_usecs", np.percentile(batch_run_times, 50))
-                metrics_logger.log_metric("batch_time_inferencing_p75_usecs", np.percentile(batch_run_times, 75))
-                metrics_logger.log_metric("batch_time_inferencing_p90_usecs", np.percentile(batch_run_times, 90))
-                metrics_logger.log_metric("batch_time_inferencing_p95_usecs", np.percentile(batch_run_times, 95))
-                metrics_logger.log_metric("batch_time_inferencing_p99_usecs", np.percentile(batch_run_times, 99))
-                # show the distribution prediction latencies
-                fig, ax = plt.subplots(1)
-                ax.hist(batch_run_times, bins=100)
-                ax.set_title("Latency-per-query histogram (log scale)")
-                plt.xlabel("usecs")
-                plt.ylabel("occurence")
-                plt.yscale('log')
-                # record in mlflow
-                metrics_logger.log_figure(fig, "latency_log_histogram.png")
+            # use helper to log latency with the right metric names
+            metrics_logger.log_inferencing_latencies(
+                time_inferencing_per_query,
+                batch_length=1, # in this exec, each row is just 1 prediction call
+                factor_to_usecs=1.0 # values are already in usecs
+            )
             if args.output:
                 np.savetxt(
@@ Expand Down @@

src/scripts/inferencing/lightgbm_python/score.py

-Original file line number
+Diff line change
@@ Expand Up / @@ -8,9 +8,10 @@ @@
     import sys
     import argparse
     import logging
+    import time
+    import numpy as np
     from distutils.util import strtobool
     import lightgbm
-    import numpy
     # Add the right path to PYTHONPATH
     # so that you can import from common.*
@@ Expand Down Expand Up @@
             )
             logger.info(f"Running .predict()")
-            with metrics_logger.log_time_block("time_inferencing"):
-                predictions_array = booster.predict(
-                    data=inference_raw_data,
-                    num_threads=args.num_threads,
-                    predict_disable_shape_check=bool(args.predict_disable_shape_check)
-                )
+            batch_start_time = time.monotonic()
+            predictions_array = booster.predict(
+                data=inference_raw_data,
+                num_threads=args.num_threads,
+                predict_disable_shape_check=bool(args.predict_disable_shape_check)
+            )
+            prediction_time = (time.monotonic() - batch_start_time)
+            metrics_logger.log_metric("time_inferencing", prediction_time)
+            # use helper to log latency with the right metric names
+            metrics_logger.log_inferencing_latencies(
+                [prediction_time], # only one big batch
+                batch_length=inference_data.num_data(),
+                factor_to_usecs=1000000.0 # values are in seconds
+            )
             if args.output:
-                numpy.savetxt(
+                np.savetxt(
                     args.output,
                     predictions_array,
                     fmt='%f',
@@ Expand All / @@ -135,4 +145,3 @@ def main(cli_args=None): @@
     if __name__ == "__main__":
         main()

tests/common/test_metrics.py

-Original file line number
+Diff line change
@@ Expand Up @@
             time.sleep(0.01)
         mlflow_log_metric_mock.assert_called_once()
+    @patch('mlflow.log_figure')
+    @patch('mlflow.log_metric')
+    def test_log_inferencing_larencies(mlflow_log_metric_mock, mlflow_log_figure_mock):
+        """ Tests MetricsLogger().log_inferencing_larencies() """
+        metrics_logger = MetricsLogger()
+        metrics_logger.close()
+        test_latencies = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0]
+        test_batch_sizes = [1, 1, 1, 1, 1, 1, 1, 5]
+        metrics_logger.log_inferencing_latencies(test_latencies, batch_length=test_batch_sizes, factor_to_usecs=1000000.0)
+        #assert mlflow_log_metric_mock.call_args_list == []
+        mlflow_log_metric_mock.assert_has_calls(
+            [
+                call("prediction_batches", 8, step=None), # len(test_latencies)
+                call("prediction_queries", 12, step=None), # sum(test_batch_sizes)
+                # reference values based on test_latencies above
+                call('prediction_latency_avg', 650000.0, step=None),
+                call('batch_latency_p50_usecs', 450000.0, step=None),
+                call('batch_latency_p75_usecs', 625000.0, step=None),
+                call('batch_latency_p90_usecs', 1989999.9999999993, step=None),
+                call('batch_latency_p95_usecs', 3494999.9999999977, step=None),
+                call('batch_latency_p99_usecs', 4698999.999999999, step=None),
+                call('prediction_latency_p50_usecs', 450000.0, step=None),
+                call('prediction_latency_p75_usecs', 625000.0, step=None),
+                call('prediction_latency_p90_usecs', 790000.0, step=None),
+                call('prediction_latency_p95_usecs', 894999.9999999999, step=None),
+                call('prediction_latency_p99_usecs', 978999.9999999999, step=None),
+            ],
+            any_order=True
+        )
+        # testing logging figures
+        figure_calls = mlflow_log_figure_mock.call_args_list
+        assert mlflow_log_figure_mock.call_count == 2
+        assert len(figure_calls) == 2
+        # second argument of each call is file name
+        assert (figure_calls[0].args[1] == "batch_latency_log_histogram.png")
+        assert (figure_calls[1].args[1] == "prediction_latency_log_histogram.png")

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Unify latency metrics inside a helper function #180

Uh oh!

Diff view

Diff view

There are no files selected for viewing