From 0207ac2db38226845d3e23ba5768b47d435b4e40 Mon Sep 17 00:00:00 2001 From: Jeff Omhover Date: Mon, 29 Nov 2021 09:17:23 -0800 Subject: [PATCH 1/3] unify latency metrics inside a helper function --- src/common/metrics.py | 66 +++++++++++++++++++ .../inferencing/lightgbm_c_api/score.py | 25 ++----- .../inferencing/lightgbm_python/score.py | 29 +++++--- 3 files changed, 91 insertions(+), 29 deletions(-) diff --git a/src/common/metrics.py b/src/common/metrics.py index f0993945..b146b469 100644 --- a/src/common/metrics.py +++ b/src/common/metrics.py @@ -165,6 +165,72 @@ def log_time_block(self, metric_name): # see class below with proper __enter__ and __exit__ return LogTimeBlock(metric_name) + def log_inferencing_latencies(self, time_per_batch, batch_length=1, factor_to_usecs=1000000.0): + """Logs prediction latencies (for inferencing) with lots of fancy metrics and plots. + + Args: + time_per_batch_list (List[float]): time per inferencing batch + batch_lengths (Union[List[int],int]): length of each batch (List or constant) + factor_to_usecs (float): factor to apply to time_per_batch to convert to microseconds + """ + if isinstance(batch_length, list): + sum_batch_lengths = sum(batch_length) + else: + sum_batch_lengths = batch_length*len(time_per_batch) + + # log metadata + self.log_metric("prediction_batches", len(time_per_batch)) + self.log_metric("prediction_queries", sum_batch_lengths) + + if len(time_per_batch) > 0: + self.log_metric("prediction_latency_avg", (sum(time_per_batch) * factor_to_usecs)/sum_batch_lengths) # usecs + + # if there's more than 1 batch, compute percentiles + if len(time_per_batch) > 1: + import numpy as np + import matplotlib.pyplot as plt + + # latency per batch + batch_run_times = np.array(time_per_batch) * factor_to_usecs + self.log_metric("batch_latency_p50_usecs", np.percentile(batch_run_times, 50)) + self.log_metric("batch_latency_p75_usecs", np.percentile(batch_run_times, 75)) + self.log_metric("batch_latency_p90_usecs", np.percentile(batch_run_times, 90)) + self.log_metric("batch_latency_p95_usecs", np.percentile(batch_run_times, 95)) + self.log_metric("batch_latency_p99_usecs", np.percentile(batch_run_times, 99)) + + # show the distribution prediction latencies + fig, ax = plt.subplots(1) + ax.hist(batch_run_times, bins=100) + ax.set_title("Latency-per-batch histogram (log scale)") + plt.xlabel("usecs") + plt.ylabel("occurence") + plt.yscale('log') + + # record in mlflow + self.log_figure(fig, "batch_latency_log_histogram.png") + + # latency per query + if isinstance(batch_length, list): + prediction_latencies = np.array(time_per_batch) * factor_to_usecs / np.array(batch_length) + else: + prediction_latencies = np.array(time_per_batch) * factor_to_usecs / batch_length + + self.log_metric("prediction_latency_p50_usecs", np.percentile(prediction_latencies, 50)) + self.log_metric("prediction_latency_p75_usecs", np.percentile(prediction_latencies, 75)) + self.log_metric("prediction_latency_p90_usecs", np.percentile(prediction_latencies, 90)) + self.log_metric("prediction_latency_p95_usecs", np.percentile(prediction_latencies, 95)) + self.log_metric("prediction_latency_p99_usecs", np.percentile(prediction_latencies, 99)) + + # show the distribution prediction latencies + fig, ax = plt.subplots(1) + ax.hist(prediction_latencies, bins=100) + ax.set_title("Latency-per-prediction histogram (log scale)") + plt.xlabel("usecs") + plt.ylabel("occurence") + plt.yscale('log') + + # record in mlflow + self.log_figure(fig, "prediction_latency_log_histogram.png") ######################## diff --git a/src/scripts/inferencing/lightgbm_c_api/score.py b/src/scripts/inferencing/lightgbm_c_api/score.py index 1353daa6..32a6ecb7 100644 --- a/src/scripts/inferencing/lightgbm_c_api/score.py +++ b/src/scripts/inferencing/lightgbm_c_api/score.py @@ -210,25 +210,12 @@ def run(self, args, logger, metrics_logger, unknown_args): else: logger.warning(f"log metric {line} does not match expected pattern {row_pattern}") - - if len(time_inferencing_per_query) > 1: - batch_run_times = np.array(time_inferencing_per_query) - metrics_logger.log_metric("batch_time_inferencing_p50_usecs", np.percentile(batch_run_times, 50)) - metrics_logger.log_metric("batch_time_inferencing_p75_usecs", np.percentile(batch_run_times, 75)) - metrics_logger.log_metric("batch_time_inferencing_p90_usecs", np.percentile(batch_run_times, 90)) - metrics_logger.log_metric("batch_time_inferencing_p95_usecs", np.percentile(batch_run_times, 95)) - metrics_logger.log_metric("batch_time_inferencing_p99_usecs", np.percentile(batch_run_times, 99)) - - # show the distribution prediction latencies - fig, ax = plt.subplots(1) - ax.hist(batch_run_times, bins=100) - ax.set_title("Latency-per-query histogram (log scale)") - plt.xlabel("usecs") - plt.ylabel("occurence") - plt.yscale('log') - - # record in mlflow - metrics_logger.log_figure(fig, "latency_log_histogram.png") + # use helper to log latency with the right metric names + metrics_logger.log_inferencing_latencies( + time_inferencing_per_query, + batch_length=1, # in this exec, each row is just 1 prediction call + factor_to_usecs=1.0 # values are already in usecs + ) if args.output: np.savetxt( diff --git a/src/scripts/inferencing/lightgbm_python/score.py b/src/scripts/inferencing/lightgbm_python/score.py index bc69dbab..2f1d20b7 100644 --- a/src/scripts/inferencing/lightgbm_python/score.py +++ b/src/scripts/inferencing/lightgbm_python/score.py @@ -8,9 +8,10 @@ import sys import argparse import logging +import time +import numpy as np from distutils.util import strtobool import lightgbm -import numpy # Add the right path to PYTHONPATH # so that you can import from common.* @@ -104,15 +105,24 @@ def run(self, args, logger, metrics_logger, unknown_args): ) logger.info(f"Running .predict()") - with metrics_logger.log_time_block("time_inferencing"): - predictions_array = booster.predict( - data=inference_raw_data, - num_threads=args.num_threads, - predict_disable_shape_check=bool(args.predict_disable_shape_check) - ) - + batch_start_time = time.monotonic() + predictions_array = booster.predict( + data=inference_raw_data, + num_threads=args.num_threads, + predict_disable_shape_check=bool(args.predict_disable_shape_check) + ) + prediction_time = (time.monotonic() - batch_start_time) + metrics_logger.log_metric("time_inferencing", prediction_time) + + # use helper to log latency with the right metric names + metrics_logger.log_inferencing_latencies( + [prediction_time], # only one big batch + batch_length=inference_data.num_data(), + factor_to_usecs=1000000.0 # values are in seconds + ) + if args.output: - numpy.savetxt( + np.savetxt( args.output, predictions_array, fmt='%f', @@ -135,4 +145,3 @@ def main(cli_args=None): if __name__ == "__main__": main() - From f988aa3e36e90af79e5624ade26fb1b6d9f7f944 Mon Sep 17 00:00:00 2001 From: Jeff Omhover Date: Mon, 29 Nov 2021 09:37:37 -0800 Subject: [PATCH 2/3] add unit tests --- tests/common/test_metrics.py | 37 ++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/common/test_metrics.py b/tests/common/test_metrics.py index 7af3af66..42d09560 100644 --- a/tests/common/test_metrics.py +++ b/tests/common/test_metrics.py @@ -208,3 +208,40 @@ def test_metrics_logger_log_time_block(mlflow_log_metric_mock): time.sleep(0.01) mlflow_log_metric_mock.assert_called_once() + + +@patch('mlflow.log_figure') +@patch('mlflow.log_metric') +def test_log_inferencing_larencies(mlflow_log_metric_mock, mlflow_log_figure_mock): + """ Tests MetricsLogger().log_inferencing_larencies() """ + metrics_logger = MetricsLogger() + metrics_logger.close() + + test_latencies = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0] + test_batch_sizes = [1, 1, 1, 1, 1, 1, 1, 5] + + metrics_logger.log_inferencing_latencies(test_latencies, batch_length=test_batch_sizes, factor_to_usecs=1000000.0) + + mlflow_log_metric_mock.assert_has_calls( + [ + call("prediction_batches", 8, step=None), # len(test_latencies) + call("prediction_queries", 12, step=None), # sum(test_batch_sizes) + + # reference values based on test_latencies above + + call("prediction_latency_avg", 650000.0, step=None), # sum(test_batch_sizes) + + call("batch_latency_p50_usecs", 450000.0, step=None), # sum(test_batch_sizes) + call("batch_latency_p75_usecs", 625000.0, step=None), # sum(test_batch_sizes) + call("batch_latency_p90_usecs", 1989999.999999999, step=None), # sum(test_batch_sizes) + call("batch_latency_p95_usecs", 3494999.9999999977, step=None), # sum(test_batch_sizes) + call("batch_latency_p99_usecs", 4698999.999999998, step=None), # sum(test_batch_sizes) + + call("prediction_latency_p50_usecs", 450000.0, step=None), # sum(test_batch_sizes) + call("prediction_latency_p75_usecs", 625000.0, step=None), # sum(test_batch_sizes) + call("prediction_latency_p90_usecs", 790000.0, step=None), # sum(test_batch_sizes) + call("prediction_latency_p95_usecs", 894999.9999999998, step=None), # sum(test_batch_sizes) + call('prediction_latency_p99_usecs', 979000.0, step=None) + ], + any_order=False + ) From 00cc37456ea62223eed1bff4a431ba18a7a96dbf Mon Sep 17 00:00:00 2001 From: Jeff Omhover Date: Mon, 29 Nov 2021 09:58:28 -0800 Subject: [PATCH 3/3] align unit tests --- tests/common/test_metrics.py | 38 ++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 15 deletions(-) diff --git a/tests/common/test_metrics.py b/tests/common/test_metrics.py index 42d09560..06997ca9 100644 --- a/tests/common/test_metrics.py +++ b/tests/common/test_metrics.py @@ -222,26 +222,34 @@ def test_log_inferencing_larencies(mlflow_log_metric_mock, mlflow_log_figure_moc metrics_logger.log_inferencing_latencies(test_latencies, batch_length=test_batch_sizes, factor_to_usecs=1000000.0) + #assert mlflow_log_metric_mock.call_args_list == [] + mlflow_log_metric_mock.assert_has_calls( [ call("prediction_batches", 8, step=None), # len(test_latencies) call("prediction_queries", 12, step=None), # sum(test_batch_sizes) # reference values based on test_latencies above - - call("prediction_latency_avg", 650000.0, step=None), # sum(test_batch_sizes) - - call("batch_latency_p50_usecs", 450000.0, step=None), # sum(test_batch_sizes) - call("batch_latency_p75_usecs", 625000.0, step=None), # sum(test_batch_sizes) - call("batch_latency_p90_usecs", 1989999.999999999, step=None), # sum(test_batch_sizes) - call("batch_latency_p95_usecs", 3494999.9999999977, step=None), # sum(test_batch_sizes) - call("batch_latency_p99_usecs", 4698999.999999998, step=None), # sum(test_batch_sizes) - - call("prediction_latency_p50_usecs", 450000.0, step=None), # sum(test_batch_sizes) - call("prediction_latency_p75_usecs", 625000.0, step=None), # sum(test_batch_sizes) - call("prediction_latency_p90_usecs", 790000.0, step=None), # sum(test_batch_sizes) - call("prediction_latency_p95_usecs", 894999.9999999998, step=None), # sum(test_batch_sizes) - call('prediction_latency_p99_usecs', 979000.0, step=None) + call('prediction_latency_avg', 650000.0, step=None), + call('batch_latency_p50_usecs', 450000.0, step=None), + call('batch_latency_p75_usecs', 625000.0, step=None), + call('batch_latency_p90_usecs', 1989999.9999999993, step=None), + call('batch_latency_p95_usecs', 3494999.9999999977, step=None), + call('batch_latency_p99_usecs', 4698999.999999999, step=None), + call('prediction_latency_p50_usecs', 450000.0, step=None), + call('prediction_latency_p75_usecs', 625000.0, step=None), + call('prediction_latency_p90_usecs', 790000.0, step=None), + call('prediction_latency_p95_usecs', 894999.9999999999, step=None), + call('prediction_latency_p99_usecs', 978999.9999999999, step=None), ], - any_order=False + any_order=True ) + + # testing logging figures + figure_calls = mlflow_log_figure_mock.call_args_list + assert mlflow_log_figure_mock.call_count == 2 + assert len(figure_calls) == 2 + + # second argument of each call is file name + assert (figure_calls[0].args[1] == "batch_latency_log_histogram.png") + assert (figure_calls[1].args[1] == "prediction_latency_log_histogram.png")