Skip to content
This repository was archived by the owner on Apr 8, 2024. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 66 additions & 0 deletions src/common/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,72 @@ def log_time_block(self, metric_name):
# see class below with proper __enter__ and __exit__
return LogTimeBlock(metric_name)

def log_inferencing_latencies(self, time_per_batch, batch_length=1, factor_to_usecs=1000000.0):
"""Logs prediction latencies (for inferencing) with lots of fancy metrics and plots.

Args:
time_per_batch_list (List[float]): time per inferencing batch
batch_lengths (Union[List[int],int]): length of each batch (List or constant)
factor_to_usecs (float): factor to apply to time_per_batch to convert to microseconds
"""
if isinstance(batch_length, list):
sum_batch_lengths = sum(batch_length)
else:
sum_batch_lengths = batch_length*len(time_per_batch)

# log metadata
self.log_metric("prediction_batches", len(time_per_batch))
self.log_metric("prediction_queries", sum_batch_lengths)

if len(time_per_batch) > 0:
self.log_metric("prediction_latency_avg", (sum(time_per_batch) * factor_to_usecs)/sum_batch_lengths) # usecs

# if there's more than 1 batch, compute percentiles
if len(time_per_batch) > 1:
import numpy as np
import matplotlib.pyplot as plt

# latency per batch
batch_run_times = np.array(time_per_batch) * factor_to_usecs
self.log_metric("batch_latency_p50_usecs", np.percentile(batch_run_times, 50))
self.log_metric("batch_latency_p75_usecs", np.percentile(batch_run_times, 75))
self.log_metric("batch_latency_p90_usecs", np.percentile(batch_run_times, 90))
self.log_metric("batch_latency_p95_usecs", np.percentile(batch_run_times, 95))
self.log_metric("batch_latency_p99_usecs", np.percentile(batch_run_times, 99))

# show the distribution prediction latencies
fig, ax = plt.subplots(1)
ax.hist(batch_run_times, bins=100)
ax.set_title("Latency-per-batch histogram (log scale)")
plt.xlabel("usecs")
plt.ylabel("occurence")
plt.yscale('log')

# record in mlflow
self.log_figure(fig, "batch_latency_log_histogram.png")

# latency per query
if isinstance(batch_length, list):
prediction_latencies = np.array(time_per_batch) * factor_to_usecs / np.array(batch_length)
else:
prediction_latencies = np.array(time_per_batch) * factor_to_usecs / batch_length

self.log_metric("prediction_latency_p50_usecs", np.percentile(prediction_latencies, 50))
self.log_metric("prediction_latency_p75_usecs", np.percentile(prediction_latencies, 75))
self.log_metric("prediction_latency_p90_usecs", np.percentile(prediction_latencies, 90))
self.log_metric("prediction_latency_p95_usecs", np.percentile(prediction_latencies, 95))
self.log_metric("prediction_latency_p99_usecs", np.percentile(prediction_latencies, 99))

# show the distribution prediction latencies
fig, ax = plt.subplots(1)
ax.hist(prediction_latencies, bins=100)
ax.set_title("Latency-per-prediction histogram (log scale)")
plt.xlabel("usecs")
plt.ylabel("occurence")
plt.yscale('log')

# record in mlflow
self.log_figure(fig, "prediction_latency_log_histogram.png")


########################
Expand Down
25 changes: 6 additions & 19 deletions src/scripts/inferencing/lightgbm_c_api/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,25 +210,12 @@ def run(self, args, logger, metrics_logger, unknown_args):
else:
logger.warning(f"log metric {line} does not match expected pattern {row_pattern}")


if len(time_inferencing_per_query) > 1:
batch_run_times = np.array(time_inferencing_per_query)
metrics_logger.log_metric("batch_time_inferencing_p50_usecs", np.percentile(batch_run_times, 50))
metrics_logger.log_metric("batch_time_inferencing_p75_usecs", np.percentile(batch_run_times, 75))
metrics_logger.log_metric("batch_time_inferencing_p90_usecs", np.percentile(batch_run_times, 90))
metrics_logger.log_metric("batch_time_inferencing_p95_usecs", np.percentile(batch_run_times, 95))
metrics_logger.log_metric("batch_time_inferencing_p99_usecs", np.percentile(batch_run_times, 99))

# show the distribution prediction latencies
fig, ax = plt.subplots(1)
ax.hist(batch_run_times, bins=100)
ax.set_title("Latency-per-query histogram (log scale)")
plt.xlabel("usecs")
plt.ylabel("occurence")
plt.yscale('log')

# record in mlflow
metrics_logger.log_figure(fig, "latency_log_histogram.png")
# use helper to log latency with the right metric names
metrics_logger.log_inferencing_latencies(
time_inferencing_per_query,
batch_length=1, # in this exec, each row is just 1 prediction call
factor_to_usecs=1.0 # values are already in usecs
)

if args.output:
np.savetxt(
Expand Down
29 changes: 19 additions & 10 deletions src/scripts/inferencing/lightgbm_python/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,10 @@
import sys
import argparse
import logging
import time
import numpy as np
from distutils.util import strtobool
import lightgbm
import numpy

# Add the right path to PYTHONPATH
# so that you can import from common.*
Expand Down Expand Up @@ -104,15 +105,24 @@ def run(self, args, logger, metrics_logger, unknown_args):
)

logger.info(f"Running .predict()")
with metrics_logger.log_time_block("time_inferencing"):
predictions_array = booster.predict(
data=inference_raw_data,
num_threads=args.num_threads,
predict_disable_shape_check=bool(args.predict_disable_shape_check)
)

batch_start_time = time.monotonic()
predictions_array = booster.predict(
data=inference_raw_data,
num_threads=args.num_threads,
predict_disable_shape_check=bool(args.predict_disable_shape_check)
)
prediction_time = (time.monotonic() - batch_start_time)
metrics_logger.log_metric("time_inferencing", prediction_time)

# use helper to log latency with the right metric names
metrics_logger.log_inferencing_latencies(
[prediction_time], # only one big batch
batch_length=inference_data.num_data(),
factor_to_usecs=1000000.0 # values are in seconds
)

if args.output:
numpy.savetxt(
np.savetxt(
args.output,
predictions_array,
fmt='%f',
Expand All @@ -135,4 +145,3 @@ def main(cli_args=None):

if __name__ == "__main__":
main()

45 changes: 45 additions & 0 deletions tests/common/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,3 +208,48 @@ def test_metrics_logger_log_time_block(mlflow_log_metric_mock):
time.sleep(0.01)

mlflow_log_metric_mock.assert_called_once()


@patch('mlflow.log_figure')
@patch('mlflow.log_metric')
def test_log_inferencing_larencies(mlflow_log_metric_mock, mlflow_log_figure_mock):
""" Tests MetricsLogger().log_inferencing_larencies() """
metrics_logger = MetricsLogger()
metrics_logger.close()

test_latencies = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 5.0]
test_batch_sizes = [1, 1, 1, 1, 1, 1, 1, 5]

metrics_logger.log_inferencing_latencies(test_latencies, batch_length=test_batch_sizes, factor_to_usecs=1000000.0)

#assert mlflow_log_metric_mock.call_args_list == []

mlflow_log_metric_mock.assert_has_calls(
[
call("prediction_batches", 8, step=None), # len(test_latencies)
call("prediction_queries", 12, step=None), # sum(test_batch_sizes)

# reference values based on test_latencies above
call('prediction_latency_avg', 650000.0, step=None),
call('batch_latency_p50_usecs', 450000.0, step=None),
call('batch_latency_p75_usecs', 625000.0, step=None),
call('batch_latency_p90_usecs', 1989999.9999999993, step=None),
call('batch_latency_p95_usecs', 3494999.9999999977, step=None),
call('batch_latency_p99_usecs', 4698999.999999999, step=None),
call('prediction_latency_p50_usecs', 450000.0, step=None),
call('prediction_latency_p75_usecs', 625000.0, step=None),
call('prediction_latency_p90_usecs', 790000.0, step=None),
call('prediction_latency_p95_usecs', 894999.9999999999, step=None),
call('prediction_latency_p99_usecs', 978999.9999999999, step=None),
],
any_order=True
)

# testing logging figures
figure_calls = mlflow_log_figure_mock.call_args_list
assert mlflow_log_figure_mock.call_count == 2
assert len(figure_calls) == 2

# second argument of each call is file name
assert (figure_calls[0].args[1] == "batch_latency_log_histogram.png")
assert (figure_calls[1].args[1] == "prediction_latency_log_histogram.png")