diff --git a/benchpress/benchmarks.yml b/benchpress/benchmarks.yml index 5400b78..2bc914e 100644 --- a/benchpress/benchmarks.yml +++ b/benchpress/benchmarks.yml @@ -26,3 +26,25 @@ silo: - latency: - avg_latency +minebench_kmeans: + parser: minebench_kmeans + path: ./benchmarks/minebench/time_wrap.sh + metrics: + - execution_time: + - real + - user + - sys + +minebench_plsa: + parser: minebench_plsa + path: ./benchmarks/minebench/plsa + metrics: + - execution_time: + - total_time + +minebench_rsearch: + parser: minebench_rsearch + path: ./benchmarks/minebench/rsearch/bin/rsearch + metrics: + - execution_time: + - total_time diff --git a/benchpress/benchpress/lib/job.py b/benchpress/benchpress/lib/job.py index 97ee85a..8b54763 100644 --- a/benchpress/benchpress/lib/job.py +++ b/benchpress/benchpress/lib/job.py @@ -91,7 +91,12 @@ def run(self): try: logger.info('Starting "{}"'.format(self.name)) - cmd = [self.binary] + self.args + safe_args = [] + for arg in self.args: + for sub_arg in arg.split(' '): + safe_args.append(sub_arg) + + cmd = [self.binary] + safe_args process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) diff --git a/benchpress/benchpress/plugins/parsers/__init__.py b/benchpress/benchpress/plugins/parsers/__init__.py index 1f04b61..c27041a 100644 --- a/benchpress/benchpress/plugins/parsers/__init__.py +++ b/benchpress/benchpress/plugins/parsers/__init__.py @@ -9,6 +9,9 @@ from .fio import FioParser from .generic import JSONParser from .ltp import LtpParser +from .minebench import KMeansParser +from .minebench import PLSAParser +from .minebench import RSearchParser from .returncode import ReturncodeParser from .schbench import SchbenchParser from .silo import SiloParser @@ -18,6 +21,9 @@ def register_parsers(factory): factory.register('fio', FioParser) factory.register('json', JSONParser) factory.register('ltp', LtpParser) + factory.register('minebench_kmeans', KMeansParser) + factory.register('minebench_plsa', PLSAParser) + factory.register('minebench_rsearch', RSearchParser) factory.register('returncode', ReturncodeParser) factory.register('schbench', SchbenchParser) factory.register('silo', SiloParser) diff --git a/benchpress/benchpress/plugins/parsers/minebench.py b/benchpress/benchpress/plugins/parsers/minebench.py new file mode 100644 index 0000000..7adaeb8 --- /dev/null +++ b/benchpress/benchpress/plugins/parsers/minebench.py @@ -0,0 +1,73 @@ +# Copyright (c) 2018-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +import re + +from benchpress.lib.parser import Parser + + +def _minebench_regex_parser(regex, output): + m = re.search(regex, output) + return m.groupdict() if m else {} + + +def _field_map(d, keys, f): + return {k: f(d[k]) for k in keys if k in d} + + +class KMeansParser(Parser): + """Example output: + real 2.00 + user 1.50 + sys 0.02 + """ + + TIME_REGEX = ( + r'^real\s(?P\d+\.\d+)' + r'user\s(?P\d+\.\d+)' + r'sys\s(?P\d+\.\d+)' + ) + + def parse(self, stdout, stderr, returncode): + output = ''.join(stderr) + times = _minebench_regex_parser(KMeansParser.TIME_REGEX, output) + times = _field_map(times, ['real', 'user', 'sys'], float) + return {'execution_time': times} + + +class PLSAParser(Parser): + """Example output: + Forward time: 26.47s + BackwardFindPathsForHugeBlock Time: 7.60 + Second phase in backward period Time: 6.19 + + Success! + Total time: 40.26s + """ + + PLSA_REGEX = r'Total\stime:\s(?P\d+\.\d+)s' + + def parse(self, stdout, stderr, returncode): + output = ''.join(stdout) + times = _minebench_regex_parser(PLSAParser.PLSA_REGEX, output) + times = _field_map(times, ['total_time'], float) + return {'execution_time': times} + + +class RSearchParser(Parser): + """Example output: + we cost 199.2 seconds totally, 22.0 for making histogram + Fin + """ + + RSEARCH_REGEX = r'we\scost\s(?P\d+\.\d+)\sseconds' + + def parse(self, stdout, stderr, returncode): + output = ''.join(stdout) + times = _minebench_regex_parser(RSearchParser.RSEARCH_REGEX, output) + times = _field_map(times, ['total_time'], float) + return {'execution_time': times} diff --git a/benchpress/install_minebench.sh b/benchpress/install_minebench.sh new file mode 100755 index 0000000..baff9d2 --- /dev/null +++ b/benchpress/install_minebench.sh @@ -0,0 +1,68 @@ +#!/bin/bash +set -e +set -x + +NU_MINEBENCH_VERSION='NU-MineBench-3.0.1' +NU_MINEBENCH_TAR_FILE="${NU_MINEBENCH_VERSION}.tar.gz" +NU_MINEBENCH_DOWNLOAD_URL="http://cucis.ece.northwestern.edu/projects/DMS" + +KMEANS_DATASET_TAR_FILE="kmeans.tar.gz" +PLSA_DATASET_TAR_FILE="PLSA.tar.gz" +RSEARCH_DATASET_TAR_FILE="rsearch.tar.gz" +NU_MINEBENCH_DATASETS="${KMEANS_DATASET_TAR_FILE} ${PLSA_DATASET_TAR_FILE} ${RSEARCH_DATASET_TAR_FILE}" + +BENCHMARKS_DIR="$(pwd)/benchmarks" +mkdir -p "${BENCHMARKS_DIR}" +mkdir -p "${BENCHMARKS_DIR}/minebench" +mkdir -p "${BENCHMARKS_DIR}/minebench/datasets" + + + +echo 'Downloading NU-MineBench and its datasets' +cd "${BENCHMARKS_DIR}/minebench/datasets/" +for dataset in $NU_MINEBENCH_DATASETS; do + wget "${NU_MINEBENCH_DOWNLOAD_URL}/DATASETS/$dataset" + tar -zxvf $dataset +done +cd "${BENCHMARKS_DIR}/.." + +cp templates/time_wrap.sh "${BENCHMARKS_DIR}/minebench/" +chmod u+x "${BENCHMARKS_DIR}/minebench/time_wrap.sh" + +rm -rf build +mkdir -p build +cd build + +wget "${NU_MINEBENCH_DOWNLOAD_URL}/${NU_MINEBENCH_TAR_FILE}" +tar -xzvf "${NU_MINEBENCH_TAR_FILE}" +cd "$NU_MINEBENCH_VERSION" + + + +echo 'Compiling and Installing KMeans' +cd 'KMeans/' +make OPTFLAGS="-O3" example +cp 'example' "${BENCHMARKS_DIR}/minebench/kmeans" +cd ../ +echo 'Done installing KMeans' + +echo 'Compiling and Installing PLSA' +cd 'PLSA/' +make COMPILEOPTION='-g -Wno-write-strings -fopenmp -O3' -f Makefile.omp +cp 'parasw.mt' "${BENCHMARKS_DIR}/minebench/plsa" +cd ../ +echo 'Done installing PLSA' + +echo 'Compiling and Installing RSearch' +cd 'RSEARCH/' +./configure --prefix="${BENCHMARKS_DIR}/minebench/rsearch" +make CFLAGS="-O3 -fopenmp" +make install +cd ../ +echo 'Done installing RSearch' + +cd ../../ + +rm -rf build/ + +echo "NU-MineBench installed into ${BENCHMARKS_DIR}/minebench" diff --git a/benchpress/jobs/jobs.yml b/benchpress/jobs/jobs.yml index e745884..91b6f72 100644 --- a/benchpress/jobs/jobs.yml +++ b/benchpress/jobs/jobs.yml @@ -1,5 +1,5 @@ - benchmark: schbench - name: schbench default + name: schbench default description: defaults for schbench args: message-threads: 2 @@ -72,3 +72,40 @@ scale-factor: 1 runtime: 1 +- benchmark: minebench_kmeans + name: minebench_kmeans default + description: defaults for minebench_kmeans + args: + - 'benchmarks/minebench/kmeans' + - '-b' # binary input + - '-o' # output time + - '-p 32' # threads to use + - '-m 50' # max number of clusters + - '-n 4' # min number of clusters + - '-f' # fuzzy kmeans + - '-i benchmarks/minebench/datasets/kmeans/edge' # dataset to cluster + +- benchmark: minebench_plsa + name: minebench_plsa default + description: defaults for minebench_plsa + args: + - 'benchmarks/minebench/datasets/PLSA/30k_1.txt' + - 'benchmarks/minebench/datasets/PLSA/30k_2.txt' + - 'benchmarks/minebench/datasets/PLSA/pam120.bla' + - '600' + - '400' + - '3' + - '3' + - '1' + - '32' + +- benchmark: minebench_rsearch + name: minebench_rsearch default + description: defaults for minebench_rsearch + args: + - '-n 100000' + - '-c' + - '-E 100' + - '-m benchmarks/minebench/datasets/rsearch/matrices/RIBOSUM85-60.mat' + - 'benchmarks/minebench/datasets/rsearch/Queries/mir-40.stk' + - 'benchmarks/minebench/datasets/rsearch/Databasefile/100Kdb.fa' diff --git a/benchpress/templates/time_wrap.sh b/benchpress/templates/time_wrap.sh new file mode 100644 index 0000000..0b67d31 --- /dev/null +++ b/benchpress/templates/time_wrap.sh @@ -0,0 +1,4 @@ +#!/bin/bash +set -e + +/usr/bin/time -p "$@" diff --git a/benchpress/tests/data/plsa_output.txt b/benchpress/tests/data/plsa_output.txt new file mode 100644 index 0000000..ba8b4f7 --- /dev/null +++ b/benchpress/tests/data/plsa_output.txt @@ -0,0 +1,14 @@ +Parellel Smith Waterman Algorithm implementation for OpenMP +Threads Number=2 +The file 'benchmarks/minebench/datasets/PLSA/pam120.bla' was opened +The file 'benchmarks/minebench/datasets/PLSA/30k_1.txt' was opened +The file 'benchmarks/minebench/datasets/PLSA/30k_2.txt' was opened +Seqence(1) Length=30144, Sequence Length(2)=29696 + Adjust block height=17, block width=17 +score=101948 x=29567 y=29696 globalstart.i=3 globalstart.j=130 +Forward time: 29.22s +BackwardFindPathsForHugeBlock Time: 8.35 +Second phase in backward period Time: 6.12 + +Success! +Total time: 43.69s diff --git a/benchpress/tests/data/rsearch_output.txt b/benchpress/tests/data/rsearch_output.txt new file mode 100644 index 0000000..3480127 --- /dev/null +++ b/benchpress/tests/data/rsearch_output.txt @@ -0,0 +1,121 @@ +Random seed: 1524699649 +D scale of 2.0 +Matrix: RIBOSUM85-60 +Alpha: 10.00 +Beta: 5.00 +Alpha': 0.00 +Beta': 15.00 +Query file: benchmarks/minebench/datasets/rsearch/Queries/mir-40.stk +Database file: benchmarks/minebench/datasets/rsearch/Databasefile/100Kdb.fa + + +beginsc = 0.000000 +endsc = -15.000000 +Making histogram cost 24.8 seconds +Statistics calculated with simulation of 100 samples of length 388 +No partition points +GC = 0 lambda = 0.0000 mu = inf +GC = 1 lambda = 0.0000 mu = inf +GC = 2 lambda = 0.0000 mu = inf +GC = 3 lambda = 0.0000 mu = inf +GC = 4 lambda = 0.0000 mu = inf +GC = 5 lambda = 0.0000 mu = inf +GC = 6 lambda = 0.0000 mu = inf +GC = 7 lambda = 0.0000 mu = inf +GC = 8 lambda = 0.0000 mu = inf +GC = 9 lambda = 0.0000 mu = inf +GC = 10 lambda = 0.0000 mu = inf +GC = 11 lambda = 0.0000 mu = inf +GC = 12 lambda = 0.0000 mu = inf +GC = 13 lambda = 0.0000 mu = inf +GC = 14 lambda = 0.0000 mu = inf +GC = 15 lambda = 0.0000 mu = inf +GC = 16 lambda = 0.0000 mu = inf +GC = 17 lambda = 0.0000 mu = inf +GC = 18 lambda = 0.0000 mu = inf +GC = 19 lambda = 0.0000 mu = inf +GC = 20 lambda = 0.0000 mu = inf +GC = 21 lambda = 0.0000 mu = inf +GC = 22 lambda = 0.0000 mu = inf +GC = 23 lambda = 0.0000 mu = inf +GC = 24 lambda = 0.0000 mu = inf +GC = 25 lambda = 0.0000 mu = inf +GC = 26 lambda = 0.0000 mu = inf +GC = 27 lambda = 0.0000 mu = inf +GC = 28 lambda = 0.0000 mu = inf +GC = 29 lambda = 0.0000 mu = inf +GC = 30 lambda = 0.0000 mu = inf +GC = 31 lambda = 0.0000 mu = inf +GC = 32 lambda = 0.0000 mu = inf +GC = 33 lambda = 0.0000 mu = inf +GC = 34 lambda = 0.0000 mu = inf +GC = 35 lambda = 0.0000 mu = inf +GC = 36 lambda = 0.0000 mu = inf +GC = 37 lambda = 0.0000 mu = inf +GC = 38 lambda = 0.0000 mu = inf +GC = 39 lambda = 0.0000 mu = inf +GC = 40 lambda = 0.0000 mu = inf +GC = 41 lambda = 0.0000 mu = inf +GC = 42 lambda = 0.0000 mu = inf +GC = 43 lambda = 0.0000 mu = inf +GC = 44 lambda = 0.0000 mu = inf +GC = 45 lambda = 0.0000 mu = inf +GC = 46 lambda = 0.0000 mu = inf +GC = 47 lambda = 0.0000 mu = inf +GC = 48 lambda = 0.0000 mu = inf +GC = 49 lambda = 0.0000 mu = inf +GC = 50 lambda = 0.0000 mu = inf +GC = 51 lambda = 0.0000 mu = inf +GC = 52 lambda = 0.0000 mu = inf +GC = 53 lambda = 0.0000 mu = inf +GC = 54 lambda = 0.0000 mu = inf +GC = 55 lambda = 0.0000 mu = inf +GC = 56 lambda = 0.0000 mu = inf +GC = 57 lambda = 0.0000 mu = inf +GC = 58 lambda = 0.0000 mu = inf +GC = 59 lambda = 0.0000 mu = inf +GC = 60 lambda = 0.0000 mu = inf +GC = 61 lambda = 0.0000 mu = inf +GC = 62 lambda = 0.0000 mu = inf +GC = 63 lambda = 0.0000 mu = inf +GC = 64 lambda = 0.0000 mu = inf +GC = 65 lambda = 0.0000 mu = inf +GC = 66 lambda = 0.0000 mu = inf +GC = 67 lambda = 0.0000 mu = inf +GC = 68 lambda = 0.0000 mu = inf +GC = 69 lambda = 0.0000 mu = inf +GC = 70 lambda = 0.0000 mu = inf +GC = 71 lambda = 0.0000 mu = inf +GC = 72 lambda = 0.0000 mu = inf +GC = 73 lambda = 0.0000 mu = inf +GC = 74 lambda = 0.0000 mu = inf +GC = 75 lambda = 0.0000 mu = inf +GC = 76 lambda = 0.0000 mu = inf +GC = 77 lambda = 0.0000 mu = inf +GC = 78 lambda = 0.0000 mu = inf +GC = 79 lambda = 0.0000 mu = inf +GC = 80 lambda = 0.0000 mu = inf +GC = 81 lambda = 0.0000 mu = inf +GC = 82 lambda = 0.0000 mu = inf +GC = 83 lambda = 0.0000 mu = inf +GC = 84 lambda = 0.0000 mu = inf +GC = 85 lambda = 0.0000 mu = inf +GC = 86 lambda = 0.0000 mu = inf +GC = 87 lambda = 0.0000 mu = inf +GC = 88 lambda = 0.0000 mu = inf +GC = 89 lambda = 0.0000 mu = inf +GC = 90 lambda = 0.0000 mu = inf +GC = 91 lambda = 0.0000 mu = inf +GC = 92 lambda = 0.0000 mu = inf +GC = 93 lambda = 0.0000 mu = inf +GC = 94 lambda = 0.0000 mu = inf +GC = 95 lambda = 0.0000 mu = inf +GC = 96 lambda = 0.0000 mu = inf +GC = 97 lambda = 0.0000 mu = inf +GC = 98 lambda = 0.0000 mu = inf +GC = 99 lambda = 0.0000 mu = inf +GC = 100 lambda = 0.0000 mu = inf +N = 235320 +Using E cutoff of 10.00 +we cost 225.7 seconds totally, 24.8 for making histogram +Fin diff --git a/benchpress/tests/test_minebench_parsers.py b/benchpress/tests/test_minebench_parsers.py new file mode 100644 index 0000000..4f20888 --- /dev/null +++ b/benchpress/tests/test_minebench_parsers.py @@ -0,0 +1,72 @@ +# Copyright (c) 2018-present, Facebook, Inc. +# All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. An additional grant +# of patent rights can be found in the PATENTS file in the same directory. + +import os +import unittest + +from benchpress.plugins.parsers.minebench import ( + KMeansParser, + PLSAParser, + RSearchParser, +) + + +class TestKMeansParser(unittest.TestCase): + def setUp(self): + self.stderr = [ + 'real 2.00', + 'user 1.50', + 'sys 0.02', + ] + self.parser = KMeansParser() + + def test_parse_expected_output(self): + metrics = self.parser.parse(None, self.stderr, 0) + self.assertTrue('execution_time' in metrics) + self.assertDictEqual({ + 'real': 2.0, + 'user': 1.5, + 'sys': 0.02, + }, metrics['execution_time']) + + +class TestPLSAParser(unittest.TestCase): + def setUp(self): + output_path = os.path.join( + os.path.dirname(__file__), + 'data', + 'plsa_output.txt' + ) + with open(output_path, 'r') as f: + self.stdout = f.readlines() + self.parser = PLSAParser() + + def test_parse_expected_output(self): + metrics = self.parser.parse(self.stdout, None, 0) + self.assertTrue('execution_time' in metrics) + self.assertDictEqual({'total_time': 43.69}, metrics['execution_time']) + + +class TestRSearchParser(unittest.TestCase): + def setUp(self): + output_path = os.path.join( + os.path.dirname(__file__), + 'data', + 'rsearch_output.txt' + ) + with open(output_path, 'r') as f: + self.stdout = f.readlines() + self.parser = RSearchParser() + + def test_parse_expected_output(self): + metrics = self.parser.parse(self.stdout, None, 0) + self.assertTrue('execution_time' in metrics) + self.assertDictEqual({'total_time': 225.7}, metrics['execution_time']) + + +if __name__ == '__main__': + unittest.main()