From 6c4cfaccf594b8e43059fa84321aa673469beab3 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Thu, 10 Feb 2022 14:11:14 -0800
Subject: [PATCH 1/6] add perf plots

---
 src/common/perf.py                            | 319 ++++++++++++------
 .../generate_data/conda_env.yaml              |   1 +
 .../lightgbm_data2bin/conda_env.yml           |   1 +
 .../partition_data/conda_env.yml              |   1 +
 .../inferencing/custom_win_cli/conda_env.yaml |   1 +
 .../treelite_python/conda_env.yaml            |   1 +
 .../treelite_compile/conda_env.yaml           |   1 +
 src/scripts/sample/conda_env.yaml             |   1 +
 8 files changed, 224 insertions(+), 102 deletions(-)

diff --git a/src/common/perf.py b/src/common/perf.py
index ea02dd06..56066ae5 100644
--- a/src/common/perf.py
+++ b/src/common/perf.py
@@ -9,6 +9,7 @@
 import threading
 import time
 import psutil
+import numpy as np
 
 
 class PerformanceReportingThread(threading.Thread):
@@ -187,6 +188,9 @@ def append_perf_metrics(self, perf_metrics):
 
 
 class PerfReportPlotter():
+    PERF_DISK_NET_PLOT_BINS = 50
+    PERF_DISK_NET_PLOT_FIGSIZE = (16,12)
+
     """Once collected all perf reports from all nodes"""
     def __init__(self, metrics_logger):
         self.all_reports = {}
@@ -197,107 +201,218 @@ def add_perf_reports(self, perf_reports, node):
         self.all_reports[node] = perf_reports
 
     def report_nodes_perf(self):
+        """Report performance via metrics/plots for all nodes (loop)"""
         # Currently reporting one metric per node
         for node in self.all_reports:
-            # CPU UTILIZATION
-            cpu_avg_utilization = [ report["cpu_pct_per_cpu_avg"] for report in self.all_reports[node] ]
-
-            self.metrics_logger.log_metric(
-                "max_t_(cpu_pct_per_cpu_avg)",
-                max(cpu_avg_utilization),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "cpu_avg_utilization_pct",
-                sum(cpu_avg_utilization)/len(cpu_avg_utilization),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "cpu_avg_utilization_at100_pct",
-                sum( [ utilization >= 100.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "cpu_avg_utilization_over80_pct",
-                sum( [ utilization >= 80.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "cpu_avg_utilization_over40_pct",
-                sum( [ utilization >= 40.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "cpu_avg_utilization_over20_pct",
-                sum( [ utilization >= 20.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "max_t_(cpu_pct_per_cpu_min)",
-                max([ report["cpu_pct_per_cpu_min"] for report in self.all_reports[node] ]),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "max_t_(cpu_pct_per_cpu_max)",
-                max([ report["cpu_pct_per_cpu_max"] for report in self.all_reports[node] ]),
-                step=node
-            )
-
-            # "CPU HOURS"
-            job_internal_cpu_hours = (time.time() - self.all_reports[node][0]["timestamp"]) * psutil.cpu_count() / 60 / 60
-            self.metrics_logger.log_metric(
-                "node_cpu_hours",
-                job_internal_cpu_hours,
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "node_unused_cpu_hours",
-                job_internal_cpu_hours * (100.0 - sum(cpu_avg_utilization)/len(cpu_avg_utilization)) / 100.0,
-                step=node
-            )
-
-            # MEM
-            self.metrics_logger.log_metric(
-                "max_t_(mem_percent)",
-                max([ report["mem_percent"] for report in self.all_reports[node] ]),
-                step=node
-            )
-
-            # DISK
-            self.metrics_logger.log_metric(
-                "max_t_disk_usage_percent",
-                max([ report["disk_usage_percent"] for report in self.all_reports[node] ]),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "total_disk_io_read_mb",
-                max([ report["disk_io_read_mb"] for report in self.all_reports[node] ]),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "total_disk_io_write_mb",
-                max([ report["disk_io_write_mb"] for report in self.all_reports[node] ]),
-                step=node
-            )
-
-            # NET I/O
-            self.metrics_logger.log_metric(
-                "total_net_io_lo_sent_mb",
-                max([ report["net_io_lo_sent_mb"] for report in self.all_reports[node] ]),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "total_net_io_ext_sent_mb",
-                max([ report["net_io_ext_sent_mb"] for report in self.all_reports[node] ]),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "total_net_io_lo_recv_mb",
-                max([ report["net_io_lo_recv_mb"] for report in self.all_reports[node] ]),
-                step=node
-            )
-            self.metrics_logger.log_metric(
-                "total_net_io_ext_recv_mb",
-                max([ report["net_io_ext_recv_mb"] for report in self.all_reports[node] ]),
-                step=node
-            )
+            self.report_node_perf(node)
+
+    def diff_at_partitions(self, a, p, t):
+        """Compute the difference end-begin for all partitions in an array.
+
+        Args:
+            a (np.array): a np array of increasing values
+            p (int): a given number of partitions
+            t (np.array): np array of timestamps for a
+        
+        Returns:
+            diff_part (np.array): dimension p
+        """
+        if len(a) > (2*p):
+            partitioned_diff = []
+            partitioned_time = []
+            index_partitions = np.array_split(np.arange(len(a)), p)
+            print(f"i={index_partitions}")
+            for part in index_partitions:
+                partitioned_diff.append(a[part[-1]] - a[part[0]]) # diff between last and first value of partition
+                partitioned_time.append(t[part[0]])
+            return np.array(partitioned_diff), np.array(partitioned_time)
+        else:
+            return np.ediff1d(a), t[1::]
+
+    def report_node_perf(self, node):
+        """Report performance via metrics/plots for one given node"""
+        # CPU UTILIZATION
+        cpu_avg_utilization = [ report["cpu_pct_per_cpu_avg"] for report in self.all_reports[node] ]
+        self.metrics_logger.log_metric(
+            "max_t_(cpu_pct_per_cpu_avg)",
+            max(cpu_avg_utilization),
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "cpu_avg_utilization_pct",
+            sum(cpu_avg_utilization)/len(cpu_avg_utilization),
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "cpu_avg_utilization_at100_pct",
+            sum( [ utilization >= 100.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "cpu_avg_utilization_over80_pct",
+            sum( [ utilization >= 80.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "cpu_avg_utilization_over40_pct",
+            sum( [ utilization >= 40.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "cpu_avg_utilization_over20_pct",
+            sum( [ utilization >= 20.0 for utilization in cpu_avg_utilization])/len(cpu_avg_utilization)*100.0,
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "max_t_(cpu_pct_per_cpu_min)",
+            max([ report["cpu_pct_per_cpu_min"] for report in self.all_reports[node] ]),
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "max_t_(cpu_pct_per_cpu_max)",
+            max([ report["cpu_pct_per_cpu_max"] for report in self.all_reports[node] ]),
+            step=node
+        )
+
+        # "CPU HOURS"
+        job_internal_cpu_hours = (time.time() - self.all_reports[node][0]["timestamp"]) * psutil.cpu_count() / 60 / 60
+        self.metrics_logger.log_metric(
+            "node_cpu_hours",
+            job_internal_cpu_hours,
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "node_unused_cpu_hours",
+            job_internal_cpu_hours * (100.0 - sum(cpu_avg_utilization)/len(cpu_avg_utilization)) / 100.0,
+            step=node
+        )
+
+        # MEM
+        mem_utilization = [ report["mem_percent"] for report in self.all_reports[node] ]
+        self.metrics_logger.log_metric(
+            "max_t_(mem_percent)",
+            max([ report["mem_percent"] for report in self.all_reports[node] ]),
+            step=node
+        )
+
+        # DISK
+        self.metrics_logger.log_metric(
+            "max_t_disk_usage_percent",
+            max([ report["disk_usage_percent"] for report in self.all_reports[node] ]),
+            step=node
+        )
+
+        disk_io_read = np.array([ report["disk_io_read_mb"] for report in self.all_reports[node] ])
+        self.metrics_logger.log_metric(
+            "total_disk_io_read_mb",
+            np.max(disk_io_read),
+            step=node
+        )
+        disk_io_write = np.array([ report["disk_io_write_mb"] for report in self.all_reports[node] ])
+        self.metrics_logger.log_metric(
+            "total_disk_io_write_mb",
+            np.max(disk_io_write),
+            step=node
+        )
+
+        # NET I/O
+        self.metrics_logger.log_metric(
+            "total_net_io_lo_sent_mb",
+            max([ report["net_io_lo_sent_mb"] for report in self.all_reports[node] ]),
+            step=node
+        )
+        net_io_ext_sent = np.array([ report["net_io_ext_sent_mb"] for report in self.all_reports[node] ])
+        self.metrics_logger.log_metric(
+            "total_net_io_ext_sent_mb",
+            np.max(net_io_ext_sent),
+            step=node
+        )
+        self.metrics_logger.log_metric(
+            "total_net_io_lo_recv_mb",
+            max([ report["net_io_lo_recv_mb"] for report in self.all_reports[node] ]),
+            step=node
+        )
+        net_io_ext_recv = np.array([ report["net_io_ext_recv_mb"] for report in self.all_reports[node] ])
+        self.metrics_logger.log_metric(
+            "total_net_io_ext_recv_mb",
+            np.max(net_io_ext_recv),
+            step=node
+        )
+
+        # then plot everything
+        timestamps = np.array([ report["timestamp"] for report in self.all_reports[node] ])
+        self.plot_all_perf(node, timestamps, disk_io_read, disk_io_write, net_io_ext_sent, net_io_ext_recv, cpu_avg_utilization, mem_utilization)
+
+    def plot_all_perf(self, node, timestamps, disk_io_read, disk_io_write, net_io_ext_sent, net_io_ext_recv, cpu_utilization, mem_utilization):
+        """Plots mem/cpy/disk/net using matplotlib.
+        
+        Args:
+            node (int): node id
+            timestamps (np.array): timestamps for all following arrays
+            disk_io_read (np.array): disk read cumulative sum (increasing)
+            disk_io_write (np.array): disk write cumulative sum (increasing)
+            net_io_ext_sent (np.array): net sent cumulative sum (increasing)
+            net_io_ext_recv (np.array): net recv cumulative sum (increasing)
+            cpu_utilization (np.array): cpu utilization
+            mem_utilization (np.array): mem utilization
+        
+        Returns:
+            None
+        
+        NOTE: we're expecting all arrays to have same length.
+        """
+        timestamps = timestamps - timestamps[0]
+        disk_io_read = disk_io_read - disk_io_read[0] # cumsum starting at 0
+        disk_io_write = disk_io_write - disk_io_write[0] # cumsum starting at 0
+        net_io_ext_sent = net_io_ext_sent - net_io_ext_sent[0] # cumsum starting at 0
+        net_io_ext_recv = net_io_ext_recv - net_io_ext_recv[0] # cumsum starting at 0
+
+        # import matploblib just-in-time
+        import matplotlib.pyplot as plt
+        #plt.switch_backend('agg')
+
+        # show the distribution prediction latencies
+        fig, ax = plt.subplots(nrows=1, ncols=1, sharex=False, figsize=(16,8))
+        
+        #ax = axes[0]
+        ax.set_xlabel('job time')
+        ax.set_ylabel('mb')
+
+        reduced_disk_io_read, reduced_timestamps = self.diff_at_partitions(disk_io_read, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
+        reduced_disk_io_write, _ = self.diff_at_partitions(disk_io_write, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
+        reduced_net_io_ext_sent, _ = self.diff_at_partitions(net_io_ext_sent, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
+        reduced_net_io_ext_recv, _ = self.diff_at_partitions(net_io_ext_recv, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
+
+        # identify a good normalization value for disk+io plots
+        max_disk_net_normalization_value = max(
+            max(reduced_disk_io_read),
+            max(reduced_disk_io_write),
+            max(reduced_net_io_ext_sent),
+            max(reduced_net_io_ext_recv),
+        ) * 1.05 # 5% more to keep some visual margin on the plot
+
+        width = max(reduced_timestamps) / len(reduced_timestamps) / 5
+        ax.bar(reduced_timestamps, reduced_disk_io_read, width=width, label=f"disk read", color="springgreen")
+        ax.bar(reduced_timestamps + width, reduced_disk_io_write, width=width, label=f"disk write", color="darkgreen")
+        ax.bar(reduced_timestamps + 2*width, reduced_net_io_ext_sent, width=width, label=f"net recv", color="hotpink")
+        ax.bar(reduced_timestamps + 3*width, reduced_net_io_ext_recv, width=width, label=f"net sent", color="purple")
+        ax.set_ylim([0.0, max_disk_net_normalization_value])
+        ax.legend(loc='upper left')
+
+        ax2 = ax.twinx()
+        #ax2 = axes[1]
+        ax2.set_ylabel('%')
+        ax2.plot(timestamps, cpu_utilization, label="cpu", color="r")
+        ax2.plot(timestamps, mem_utilization, label="mem", color="b")
+        ax2.set_ylim([0.0, 100.0])
+        ax2.legend(loc='upper right')
+        #ax.set_title(f"Disk+Net I/O normalized plot (max = {max_disk_net_normalization_value:.2f}mb")
+
+        #plt.xlabel("job time")
+        #plt.ylim(0.0, max_disk_net_normalization_value)
+        plt.legend(loc='best')
+
+        plt.show()
+
+        # record in mlflow
+        self.metrics_logger.log_figure(fig, f"disk_and_net_plot_node{node}.png")
diff --git a/src/scripts/data_processing/generate_data/conda_env.yaml b/src/scripts/data_processing/generate_data/conda_env.yaml
index 223c34f7..9480a920 100644
--- a/src/scripts/data_processing/generate_data/conda_env.yaml
+++ b/src/scripts/data_processing/generate_data/conda_env.yaml
@@ -10,3 +10,4 @@ dependencies:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
   - psutil==5.8.0
+  - matplotlib==3.4.3
diff --git a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
index 2c403f37..3245a774 100644
--- a/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
+++ b/src/scripts/data_processing/lightgbm_data2bin/conda_env.yml
@@ -11,3 +11,4 @@ dependencies:
   - azureml-mlflow==1.35.0
   - psutil==5.8.0
   - lightgbm==3.2.1
+  - matplotlib==3.4.3
diff --git a/src/scripts/data_processing/partition_data/conda_env.yml b/src/scripts/data_processing/partition_data/conda_env.yml
index 39dabefc..f15a18f7 100644
--- a/src/scripts/data_processing/partition_data/conda_env.yml
+++ b/src/scripts/data_processing/partition_data/conda_env.yml
@@ -10,4 +10,5 @@ dependencies:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
   - psutil==5.8.0
+  - matplotlib==3.4.3
 
diff --git a/src/scripts/inferencing/custom_win_cli/conda_env.yaml b/src/scripts/inferencing/custom_win_cli/conda_env.yaml
index 78eed94f..a15650c6 100644
--- a/src/scripts/inferencing/custom_win_cli/conda_env.yaml
+++ b/src/scripts/inferencing/custom_win_cli/conda_env.yaml
@@ -8,3 +8,4 @@ dependencies:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
   - psutil==5.8.0
+  - matplotlib==3.4.3
diff --git a/src/scripts/inferencing/treelite_python/conda_env.yaml b/src/scripts/inferencing/treelite_python/conda_env.yaml
index b31a7368..0d08ea4c 100644
--- a/src/scripts/inferencing/treelite_python/conda_env.yaml
+++ b/src/scripts/inferencing/treelite_python/conda_env.yaml
@@ -12,3 +12,4 @@ dependencies:
   - treelite_runtime==2.1.0
   - pandas>=1.1,<1.2
   - numpy>=1.10,<1.20
+  - matplotlib==3.4.3
diff --git a/src/scripts/model_transformation/treelite_compile/conda_env.yaml b/src/scripts/model_transformation/treelite_compile/conda_env.yaml
index b31a7368..0d08ea4c 100644
--- a/src/scripts/model_transformation/treelite_compile/conda_env.yaml
+++ b/src/scripts/model_transformation/treelite_compile/conda_env.yaml
@@ -12,3 +12,4 @@ dependencies:
   - treelite_runtime==2.1.0
   - pandas>=1.1,<1.2
   - numpy>=1.10,<1.20
+  - matplotlib==3.4.3
diff --git a/src/scripts/sample/conda_env.yaml b/src/scripts/sample/conda_env.yaml
index 0201788d..2b11a210 100644
--- a/src/scripts/sample/conda_env.yaml
+++ b/src/scripts/sample/conda_env.yaml
@@ -8,3 +8,4 @@ dependencies:
   - azureml-defaults==1.35.0
   - azureml-mlflow==1.35.0
   - psutil==5.8.0
+  - matplotlib==3.4.3

From d4f44377e0b341f7c0ea75a3a70d5b9af91eb793 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Thu, 10 Feb 2022 14:12:23 -0800
Subject: [PATCH 2/6] use global var figsize

---
 src/common/perf.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/common/perf.py b/src/common/perf.py
index 56066ae5..5075d61b 100644
--- a/src/common/perf.py
+++ b/src/common/perf.py
@@ -189,7 +189,7 @@ def append_perf_metrics(self, perf_metrics):
 
 class PerfReportPlotter():
     PERF_DISK_NET_PLOT_BINS = 50
-    PERF_DISK_NET_PLOT_FIGSIZE = (16,12)
+    PERF_DISK_NET_PLOT_FIGSIZE = (16,8)
 
     """Once collected all perf reports from all nodes"""
     def __init__(self, metrics_logger):
@@ -372,7 +372,7 @@ def plot_all_perf(self, node, timestamps, disk_io_read, disk_io_write, net_io_ex
         #plt.switch_backend('agg')
 
         # show the distribution prediction latencies
-        fig, ax = plt.subplots(nrows=1, ncols=1, sharex=False, figsize=(16,8))
+        fig, ax = plt.subplots(nrows=1, ncols=1, sharex=False, figsize=PerfReportPlotter.PERF_DISK_NET_PLOT_FIGSIZE)
         
         #ax = axes[0]
         ax.set_xlabel('job time')

From 6050f379aba5a8ab17c187957de08d1d403ad604 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Fri, 11 Feb 2022 13:18:14 -0800
Subject: [PATCH 3/6] polish plot for accessibility (hopefully)

---
 src/common/perf.py | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/src/common/perf.py b/src/common/perf.py
index 5075d61b..bfe6d96c 100644
--- a/src/common/perf.py
+++ b/src/common/perf.py
@@ -369,15 +369,15 @@ def plot_all_perf(self, node, timestamps, disk_io_read, disk_io_write, net_io_ex
 
         # import matploblib just-in-time
         import matplotlib.pyplot as plt
-        #plt.switch_backend('agg')
+        plt.switch_backend('agg') # to enable it in thread
 
-        # show the distribution prediction latencies
-        fig, ax = plt.subplots(nrows=1, ncols=1, sharex=False, figsize=PerfReportPlotter.PERF_DISK_NET_PLOT_FIGSIZE)
+        fig, ax = plt.subplots(
+            nrows=1,
+            ncols=1,
+            sharex=False, # both mem+cpu and disk+net will share the same x axis (job time)
+            figsize=PerfReportPlotter.PERF_DISK_NET_PLOT_FIGSIZE
+        )
         
-        #ax = axes[0]
-        ax.set_xlabel('job time')
-        ax.set_ylabel('mb')
-
         reduced_disk_io_read, reduced_timestamps = self.diff_at_partitions(disk_io_read, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
         reduced_disk_io_write, _ = self.diff_at_partitions(disk_io_write, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
         reduced_net_io_ext_sent, _ = self.diff_at_partitions(net_io_ext_sent, PerfReportPlotter.PERF_DISK_NET_PLOT_BINS, timestamps)
@@ -391,28 +391,24 @@ def plot_all_perf(self, node, timestamps, disk_io_read, disk_io_write, net_io_ex
             max(reduced_net_io_ext_recv),
         ) * 1.05 # 5% more to keep some visual margin on the plot
 
-        width = max(reduced_timestamps) / len(reduced_timestamps) / 5
-        ax.bar(reduced_timestamps, reduced_disk_io_read, width=width, label=f"disk read", color="springgreen")
-        ax.bar(reduced_timestamps + width, reduced_disk_io_write, width=width, label=f"disk write", color="darkgreen")
-        ax.bar(reduced_timestamps + 2*width, reduced_net_io_ext_sent, width=width, label=f"net recv", color="hotpink")
-        ax.bar(reduced_timestamps + 3*width, reduced_net_io_ext_recv, width=width, label=f"net sent", color="purple")
+        width = max(reduced_timestamps) / len(reduced_timestamps) / 5 # barplot width = 1/5 of timestamp unit
+        ax.bar(reduced_timestamps, reduced_disk_io_read, width=width, label=f"disk read", color="w", hatch='/', edgecolor="dimgray")
+        ax.bar(reduced_timestamps + width, reduced_disk_io_write, width=width, label=f"disk write", color="w", hatch='-', edgecolor="dimgray")
+        ax.bar(reduced_timestamps + 2*width, reduced_net_io_ext_sent, width=width, label=f"net recv", color="silver")
+        ax.bar(reduced_timestamps + 3*width, reduced_net_io_ext_recv, width=width, label=f"net sent", color="gray")
         ax.set_ylim([0.0, max_disk_net_normalization_value])
         ax.legend(loc='upper left')
+        ax.set_ylabel('MB')
 
-        ax2 = ax.twinx()
-        #ax2 = axes[1]
-        ax2.set_ylabel('%')
+        ax2 = ax.twinx() # will superpose plot to ax, but show axis on right
         ax2.plot(timestamps, cpu_utilization, label="cpu", color="r")
-        ax2.plot(timestamps, mem_utilization, label="mem", color="b")
-        ax2.set_ylim([0.0, 100.0])
+        ax2.plot(timestamps, mem_utilization, label="mem", color="b", linestyle='dotted')
+        ax2.set_ylim([0.0, max(max(cpu_utilization), max(mem_utilization), 100.0)])
         ax2.legend(loc='upper right')
-        #ax.set_title(f"Disk+Net I/O normalized plot (max = {max_disk_net_normalization_value:.2f}mb")
-
-        #plt.xlabel("job time")
-        #plt.ylim(0.0, max_disk_net_normalization_value)
-        plt.legend(loc='best')
+        ax2.set_ylabel('%')
 
-        plt.show()
+        ax.set_xlabel("time in seconds (job duration={:.2f}s)".format(timestamps[-1]-timestamps[0]))
+        plt.title(f"Perf plot for node {node}")
 
         # record in mlflow
-        self.metrics_logger.log_figure(fig, f"disk_and_net_plot_node{node}.png")
+        self.metrics_logger.log_figure(fig, f"perf_plot_node{node}.png")

From 9b74effc9e095e38ac69abb16ae3e7cd0fcc248b Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Fri, 11 Feb 2022 14:23:33 -0800
Subject: [PATCH 4/6] add log_figure to component tests

---
 tests/common/test_component.py   | 6 ++++--
 tests/common/test_distributed.py | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/tests/common/test_component.py b/tests/common/test_component.py
index 2865f9e3..6d3bb9ea 100644
--- a/tests/common/test_component.py
+++ b/tests/common/test_component.py
@@ -102,11 +102,12 @@ def run(self, args, logger, metrics_logger, unknown_args):
             time.sleep(1)
 
 @patch('mlflow.end_run')
+@patch('mlflow.log_figure')
 @patch('mlflow.log_artifact')
 @patch('mlflow.log_metric')
 @patch('mlflow.set_tags')
 @patch('mlflow.start_run')
-def test_single_node_script_metrics(mlflow_start_run_mock, mlflow_set_tags_mock, mlflow_log_metric_mock, mlflow_log_artifact_mock, mlflow_end_run_mock):
+def test_single_node_script_metrics(mlflow_start_run_mock, mlflow_set_tags_mock, mlflow_log_metric_mock, mlflow_log_artifact_mock, mlflow_log_figure_mock, mlflow_end_run_mock):
     # just run main
     test_component = FakeSingleNodeScript.main(
         [
@@ -132,7 +133,8 @@ def test_single_node_script_metrics(mlflow_start_run_mock, mlflow_set_tags_mock,
         mlflow_log_metric_mock
     )
 
-    mlflow_log_artifact_mock.assert_called_once()
+    mlflow_log_artifact_mock.assert_called_once() # perf data exported in json
+    mlflow_log_figure_mock.assert_called_once() # perf plot
 
 
 class FailingSingleNodeScript(SingleNodeScript):
diff --git a/tests/common/test_distributed.py b/tests/common/test_distributed.py
index 23424af1..561212eb 100644
--- a/tests/common/test_distributed.py
+++ b/tests/common/test_distributed.py
@@ -27,11 +27,12 @@ def run(self, args, logger, metrics_logger, unknown_args):
 
 @patch('mlflow.end_run')
 @patch('mlflow.log_artifact')
+@patch('mlflow.log_artifact')
 @patch('mlflow.log_metric')
 @patch('mlflow.set_tags')
 @patch('mlflow.start_run')
 @patch('common.distributed.MultiNodeMPIDriver')
-def test_multi_node_script(mpi_driver_mock, mlflow_start_run_mock, mlflow_set_tags_mock, mlflow_log_metric_mock, mlflow_log_artifact_mock, mlflow_end_run_mock):
+def test_multi_node_script(mpi_driver_mock, mlflow_start_run_mock, mlflow_set_tags_mock, mlflow_log_metric_mock, mlflow_log_artifact_mock, mlflow_log_figure_mock, mlflow_end_run_mock):
     # fake mpi initialization + config
     mpi_driver_mock().get_multinode_config.return_value = multinode_config_class(
         1, # world_size
@@ -65,7 +66,8 @@ def test_multi_node_script(mpi_driver_mock, mlflow_start_run_mock, mlflow_set_ta
         mlflow_log_metric_mock
     )
 
-    mlflow_log_artifact_mock.assert_called_once()
+    mlflow_log_artifact_mock.assert_called_once() # perf data exported in json
+    mlflow_log_figure_mock.assert_called_once() # perf plot
 
 
 class FailingMultiNodeScript(MultiNodeScript):

From 805ce93bbd5ba1b482274c8f7fc2309a32b78213 Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Fri, 11 Feb 2022 15:03:22 -0800
Subject: [PATCH 5/6] update tests

---
 src/common/perf.py               | 6 +++++-
 tests/common/test_component.py   | 2 +-
 tests/common/test_distributed.py | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/src/common/perf.py b/src/common/perf.py
index 310e2146..906b51dc 100644
--- a/src/common/perf.py
+++ b/src/common/perf.py
@@ -197,6 +197,7 @@ class PerfReportPlotter():
     def __init__(self, metrics_logger):
         self.all_reports = {}
         self.metrics_logger = metrics_logger
+        self.logger = logging.getLogger(__name__)
 
     def save_to(self, perf_report_file_path=None):
         """Saves all reports into a json file"""
@@ -234,7 +235,6 @@ def diff_at_partitions(self, a, p, t):
             partitioned_diff = []
             partitioned_time = []
             index_partitions = np.array_split(np.arange(len(a)), p)
-            print(f"i={index_partitions}")
             for part in index_partitions:
                 partitioned_diff.append(a[part[-1]] - a[part[0]]) # diff between last and first value of partition
                 partitioned_time.append(t[part[0]])
@@ -374,6 +374,10 @@ def plot_all_perf(self, node, timestamps, disk_io_read, disk_io_write, net_io_ex
         
         NOTE: we're expecting all arrays to have same length.
         """
+        if len(timestamps) < 3:
+            self.logger.warning(f"Number of data points for perf is too low {len(timestamps)}, we will not print perf plot.")
+            return
+
         timestamps = timestamps - timestamps[0]
         disk_io_read = disk_io_read - disk_io_read[0] # cumsum starting at 0
         disk_io_write = disk_io_write - disk_io_write[0] # cumsum starting at 0
diff --git a/tests/common/test_component.py b/tests/common/test_component.py
index 6d3bb9ea..5c7655cb 100644
--- a/tests/common/test_component.py
+++ b/tests/common/test_component.py
@@ -99,7 +99,7 @@ def __init__(self):
     def run(self, args, logger, metrics_logger, unknown_args):
         # don't do anything
         with metrics_logger.log_time_block("fake_time_block", step=1):
-            time.sleep(1)
+            time.sleep(5)
 
 @patch('mlflow.end_run')
 @patch('mlflow.log_figure')
diff --git a/tests/common/test_distributed.py b/tests/common/test_distributed.py
index 561212eb..79ff6c1d 100644
--- a/tests/common/test_distributed.py
+++ b/tests/common/test_distributed.py
@@ -22,7 +22,7 @@ def __init__(self):
     def run(self, args, logger, metrics_logger, unknown_args):
         # don't do anything
         with metrics_logger.log_time_block("fake_time_block", step=1):
-            time.sleep(1)
+            time.sleep(5)
 
 
 @patch('mlflow.end_run')

From a41a699a8f18295d51243e23e7cca0a85e32a26e Mon Sep 17 00:00:00 2001
From: Jeff Omhover <jf.omhover@gmail.com>
Date: Fri, 11 Feb 2022 15:15:43 -0800
Subject: [PATCH 6/6] fix unit tests

---
 tests/common/test_distributed.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/common/test_distributed.py b/tests/common/test_distributed.py
index 79ff6c1d..80662093 100644
--- a/tests/common/test_distributed.py
+++ b/tests/common/test_distributed.py
@@ -26,7 +26,7 @@ def run(self, args, logger, metrics_logger, unknown_args):
 
 
 @patch('mlflow.end_run')
-@patch('mlflow.log_artifact')
+@patch('mlflow.log_figure')
 @patch('mlflow.log_artifact')
 @patch('mlflow.log_metric')
 @patch('mlflow.set_tags')