From dc9f9a767751f953e5500ab95a1b692c13e1f921 Mon Sep 17 00:00:00 2001 From: Antti Kervinen Date: Thu, 28 Nov 2019 11:41:41 +0200 Subject: [PATCH] scaling: add used memory to PDF report table Currently the PDF report shows how much scaling up consumes free memory. This number is not comparable between cluster nodes or even test runs because RAM used for OS caches/buffers/slab is counted as consumed. As a consequence, consumed free memory depends heavily on initial memory conditions of a node, instead of used memory by the k8s and pods. This patch adds "memory used" to the report in order to have less node-dependent and more reproducible memory figure. Using /proc/meminfo "MemAvailable" was also tried out, but it varies almost like "MemFree" that is currently reported. Signed-off-by: Antti Kervinen --- .../report_dockerfile/collectd_scaling.R | 87 +++++++++++++++---- .../report_dockerfile/metrics_report.Rmd | 2 +- 2 files changed, 70 insertions(+), 19 deletions(-) diff --git a/metrics/report/report_dockerfile/collectd_scaling.R b/metrics/report/report_dockerfile/collectd_scaling.R index 88e65bf7..9b657f51 100755 --- a/metrics/report/report_dockerfile/collectd_scaling.R +++ b/metrics/report/report_dockerfile/collectd_scaling.R @@ -20,6 +20,7 @@ testnames=c( podbootdata=c() # Track per-launch data cpuidledata=c() # Track cpu idle data per nodes memfreedata=c() # Track mem free data for nodes +memuseddata=c() # Track mem used data for nodes inodefreedata=c() # Track inode free data for nodes ifpacketdata=c() # Track interface packet data for nodes ifoctetdata=c() # Track interface octets data for nodes @@ -104,11 +105,13 @@ for (currentdir in resultdirs) { # Get a list of all the nodes from the schedule data list nodes=names(node_sched_data) - memtotal=0 + memfreedelta_total=0 + memuseddelta_total=0 cputotal=0 inodetotal=0 cpu_idle_data=c() mem_free_data=c() + mem_used_data=c() inode_free_data=c() interface_packets_data=c() interface_octets_data=c() @@ -127,7 +130,7 @@ for (currentdir in resultdirs) { localhost_dir=paste(node_dir, "localhost", sep="/") # grab memory data - memory_dir=paste(localhost_dir, "memory", sep="/") + memory_dir=paste(localhost_dir, "memory", sep="/") # filename has date on the end, so look for the right file name freemem_pattern='^memory\\-free' files=list.files(memory_dir, pattern=freemem_pattern) @@ -136,13 +139,26 @@ for (currentdir in resultdirs) { mem_free_csv=paste(memory_dir, file, sep="/") node_mem_free_data=read.csv(mem_free_csv, header=TRUE, sep=",") node_mem_free_data=cbind(node_mem_free_data, - node=rep(n, length(node_mem_free_data$value))) + node=rep(n, length(node_mem_free_data$value))) node_mem_free_data=cbind(node_mem_free_data, - testname=rep(testname, length(node_mem_free_data$value))) + testname=rep(testname, length(node_mem_free_data$value))) node_mem_free_data$s_offset = node_mem_free_data$epoch - local_bootdata[1,]$epoch - mem_free_data=rbind(mem_free_data, node_mem_free_data) } + # filename has date on the end, so look for the right file name + usedmem_pattern='^memory\\-used' + files=list.files(memory_dir, pattern=usedmem_pattern) + # collectd csv plugin starts a new file for each day of data collected + for(file in files) { + mem_used_csv=paste(memory_dir, file, sep="/") + node_mem_used_data=read.csv(mem_used_csv, header=TRUE, sep=",") + node_mem_used_data=cbind(node_mem_used_data, + node=rep(n, length(node_mem_used_data$value))) + node_mem_used_data=cbind(node_mem_used_data, + testname=rep(testname, length(node_mem_used_data$value))) + node_mem_used_data$s_offset = node_mem_used_data$epoch - local_bootdata[1,]$epoch + mem_used_data=rbind(mem_used_data, node_mem_used_data) + } # grab CPU data cpu_dir=paste(localhost_dir, "aggregation-cpu-average", sep="/") @@ -273,6 +289,8 @@ for (currentdir in resultdirs) { end_time=local_bootdata$epoch[length(local_bootdata$epoch)] # get value closest to first pod launch + # memory-free and memory-used data share exactly the same timestamps, + # so the same start/end indexes work for both. mem_start_index=Position(function(x) x > start_time, node_mem_free_data$epoch) # take the reading previous to the index as long as a valid index if (is.na(mem_start_index)) { @@ -281,6 +299,7 @@ for (currentdir in resultdirs) { mem_start_index = mem_start_index - 1 } max_free_mem=node_mem_free_data$value[mem_start_index] + min_used_mem=node_mem_used_data$value[mem_start_index] # get value closest to last pod launch mem_end_index=Position(function(x) x > end_time, node_mem_free_data$epoch) @@ -291,8 +310,10 @@ for (currentdir in resultdirs) { mem_end_index = mem_end_index - 1 } min_free_mem=node_mem_free_data$value[mem_end_index] + max_used_mem=node_mem_used_data$value[mem_end_index] - memtotal = memtotal + (max_free_mem - min_free_mem) + memfreedelta_total = memfreedelta_total + (max_free_mem - min_free_mem) + memuseddelta_total = memuseddelta_total + (max_used_mem - min_used_mem) # get value closest to first pod launch cpu_start_index=Position(function(x) x > start_time, node_cpu_idle_data$epoch) @@ -342,17 +363,21 @@ for (currentdir in resultdirs) { num_pods = local_bootdata$n_pods[length(local_bootdata$n_pods)] # We get data in b, but want the graphs in Gb. - memtotal = memtotal / (1024*1024*1024) - gb_per_pod = memtotal/num_pods - pod_per_gb = 1/gb_per_pod + memfreedelta_total = memfreedelta_total / (1024*1024*1024) + memuseddelta_total = memuseddelta_total / (1024*1024*1024) + gb_nonfree_per_pod = memfreedelta_total/num_pods + gb_used_per_pod = memuseddelta_total/num_pods + pod_per_nonfree_gb = 1/gb_nonfree_per_pod + pod_per_used_gb = 1/gb_used_per_pod # Memory usage stats. local_mems = c( "Test"=testname, "n"=num_pods, - "Tot_Gb"=round(memtotal, 3), - "avg_Gb"=round(gb_per_pod, 4), - "n_per_Gb"=round(pod_per_gb, 2) + "Free_GB_delta"=round(memfreedelta_total, 3), + "Used_GB_delta"=round(memuseddelta_total, 3), + "n_per_nonfree_GB"=round(pod_per_nonfree_gb, 2), + "n_per_used_GB"=round(pod_per_used_gb, 2) ) memstats=rbind(memstats, local_mems) @@ -393,6 +418,7 @@ for (currentdir in resultdirs) { podbootdata=rbind(podbootdata, local_bootdata, make.row.names=FALSE) cpuidledata=rbind(cpuidledata, cpu_idle_data) memfreedata=rbind(memfreedata, mem_free_data) + memuseddata=rbind(memuseddata, mem_used_data) inodefreedata=rbind(inodefreedata, inode_free_data) ifpacketdata=rbind(ifpacketdata, interface_packets_data) ifoctetdata=rbind(ifoctetdata, interface_octets_data) @@ -404,6 +430,7 @@ for (currentdir in resultdirs) { # It's nice to show the graphs in Gb, at least for any decent sized test # run, so make a new column with that pre-divided data in it for us to use. memfreedata$mem_free_gb = memfreedata$value/(1024*1024*1024) +memuseddata$mem_used_gb = memuseddata$value/(1024*1024*1024) # And show the boot times in seconds, not ms podbootdata$launch_time_s = podbootdata$launch_time/1000.0 @@ -414,8 +441,9 @@ mem_stats_plot = suppressWarnings(ggtexttable(data.frame(memstats), rows=NULL )) -mem_scale = (max(memfreedata$value) / (1024*1024*1024)) / max(podbootdata$n_pods) -mem_line_plot <- ggplot() + +mem_free_scale = (max(memfreedata$value) / (1024*1024*1024)) / max(podbootdata$n_pods) +mem_used_scale = (max(memuseddata$value) / (1024*1024*1024)) / max(podbootdata$n_pods) +mem_free_line_plot <- ggplot() + geom_line(data=memfreedata, aes(s_offset, mem_free_gb, colour=interaction(testname, node), group=interaction(testname, node)), @@ -425,21 +453,44 @@ mem_line_plot <- ggplot() + group=interaction(testname, node)), alpha=0.5, size=0.5) + geom_line(data=podbootdata, - aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), group=testname), + aes(x=s_offset, y=n_pods*mem_free_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.2) + geom_point(data=podbootdata, - aes(x=s_offset, y=n_pods*mem_scale, colour=interaction(testname,"pod count"), group=testname), + aes(x=s_offset, y=n_pods*mem_free_scale, colour=interaction(testname,"pod count"), group=testname), alpha=0.3, size=0.5) + labs(colour="") + xlab("seconds") + ylab("System Avail (Gb)") + - scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./mem_scale, name="pods")) + + scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./mem_free_scale, name="pods")) + ggtitle("System Memory free") + theme(legend.position="bottom") + theme(axis.text.x=element_text(angle=90)) +mem_used_line_plot <- ggplot() + + geom_line(data=memuseddata, + aes(s_offset, mem_used_gb, colour=interaction(testname, node), + group=interaction(testname, node)), + alpha=0.3) + + geom_point(data=memuseddata, + aes(s_offset, mem_used_gb, colour=interaction(testname, node), + group=interaction(testname, node)), + alpha=0.5, size=0.5) + + geom_line(data=podbootdata, + aes(x=s_offset, y=n_pods*mem_used_scale, colour=interaction(testname,"pod count"), group=testname), + alpha=0.2) + + geom_point(data=podbootdata, + aes(x=s_offset, y=n_pods*mem_used_scale, colour=interaction(testname,"pod count"), group=testname), + alpha=0.3, size=0.5) + + labs(colour="") + + xlab("seconds") + + ylab("System Used (Gb)") + + scale_y_continuous(labels=comma, sec.axis=sec_axis(~ ./mem_used_scale, name="pods")) + + ggtitle("System Memory used, not counting Cached, Buffered and SLAB") + + theme(axis.text.x=element_text(angle=90)) + page1 = grid.arrange( - mem_line_plot, + mem_free_line_plot, + mem_used_line_plot, mem_stats_plot, ncol=1 ) diff --git a/metrics/report/report_dockerfile/metrics_report.Rmd b/metrics/report/report_dockerfile/metrics_report.Rmd index 630365ac..2af629a5 100644 --- a/metrics/report/report_dockerfile/metrics_report.Rmd +++ b/metrics/report/report_dockerfile/metrics_report.Rmd @@ -40,7 +40,7 @@ source('parallel.R') # Runtime scaling rapid This [test](https://github.com/clearlinux/cloud-native-setup/metrics/scaling/k8s_scale_fast.sh) -uses collectd to asynchronously measure CPU idle %, free memory, pod boot time, free inodes, +uses collectd to asynchronously measure CPU idle %, free and used memory, pod boot time, free inodes, and interface stats as it launches more and more idle `busybox` pods on a Kubernetes cluster. > Note: CPU % is measured as a system whole - 100% represents *all* CPUs on the node.