From fb7aff7ace196344a5c5a7b562ae777d3f98ca8e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Wed, 22 Sep 2021 15:17:34 +0200 Subject: [PATCH 01/10] preparation for the Gong reimplementation --- framework/experiment/experiment_gong.py | 26 +++++++++++++++++++++---- framework/main.py | 3 ++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index a50f4b2..5f59805 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,3 +1,5 @@ +from random import randint + from framework.context.context import Context from framework.core.answer import Answer from framework.core.mediator import Mediator @@ -16,20 +18,36 @@ class Questioner(Questioner): def __init__(self, context: Context): self.context = context + self.subject = None def next_subject(self): - pass + self.subject = sorted(self.context.code_element_set.code_elements, + key=lambda ce: (-ce.score, ce.name))[0] + return self.subject def acknowledge(self, answer: Answer): - pass + pass # TODO set the new score here class Oracle(Oracle): - def __init__(self, context: Context): self.context = context def ask_about(self, subject): - pass + if self.context.is_faulty(subject): + return Answer.YES + else: + neighbours = self.context.get_neighbours(subject) + faulty_neighbours = [ce for ce in neighbours if self.context.is_faulty(ce)] + + r = randint(0, 100) + + if r > self.context.knowledge: + return Answer.NO + else: + if len(faulty_neighbours) == 0: + return Answer.NO_AND_NOT_SUSPICIOUS + else: + return Answer.NO_BUT_SUSPICIOUS def configure(self): questioner = self.Questioner(self.context) diff --git a/framework/main.py b/framework/main.py index 1b54588..fb1546b 100755 --- a/framework/main.py +++ b/framework/main.py @@ -6,6 +6,7 @@ from framework.context.code_element import CodeElement from framework.context.context import Defects4JContext from framework.context.context import SIRContext +from framework.experiment.experiment_gong import ExperimentGong from framework.experiment.experiment_one import Experiment from framework.experiment.experiment_one import ExperimentOne from framework.experiment.experiment_one import ExperimentOneB @@ -40,7 +41,7 @@ def jobs(): def do(job): - e = ExperimentOneB("ex1b", Defects4JContext) + e = ExperimentGong("expgong", Defects4JContext) e.run(args.datadir, j(args.outdir, args.score, str(job[0]), str(job[1]), str(job[2])), knowledge=job[0], confidence=job[2]) From c47a5bf3b3ec014ee60ea3469238a6fd7236f0a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Wed, 22 Sep 2021 16:04:25 +0200 Subject: [PATCH 02/10] skeleton for root cause calculation --- framework/core/answer.py | 2 ++ framework/experiment/experiment_gong.py | 31 +++++++++++++++++-------- 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/framework/core/answer.py b/framework/core/answer.py index 2f273e7..41733ed 100644 --- a/framework/core/answer.py +++ b/framework/core/answer.py @@ -5,8 +5,10 @@ class Answer(Enum): # Subject is faulty YES = 1 + FAULTY = 1 # introduced for Gong experiment # Subject is not faulty NO = 2 + CLEAN = 2 # introduced for Gong experiment # Subject is faulty but its neighbours are suspicious NO_BUT_SUSPICIOUS = 3 # Neither the subject, nor its neighbours are faulty diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index 5f59805..67aee8d 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,5 +1,7 @@ from random import randint +from typing import Dict +from framework.context.code_element import CodeElement from framework.context.context import Context from framework.core.answer import Answer from framework.core.mediator import Mediator @@ -8,6 +10,12 @@ from framework.experiment.experiment import Experiment +def root_likelihood(code_element, context) -> float: + tests = context.coverage_matrix.get_tests(with_result=True) + raise NotImplementedError() + return 42 + + class ExperimentGong(Experiment): """ Experiment implementing the algorithm proposed by @@ -26,7 +34,16 @@ def next_subject(self): return self.subject def acknowledge(self, answer: Answer): - pass # TODO set the new score here + if answer == Answer.CLEAN: + code_elements = self.context.code_element_set.code_elements - {self.subject} + causes: Dict[CodeElement, float] = {} + for code_element in code_elements: + causes[code_element] = root_likelihood(code_element, self.context) + elif answer == Answer.FAULTY: + print("Doing nothing, going to stop anyway.") + else: + raise ValueError('Gong experiment do not use code-context.') + class Oracle(Oracle): def __init__(self, context: Context): @@ -34,20 +51,14 @@ def __init__(self, context: Context): def ask_about(self, subject): if self.context.is_faulty(subject): - return Answer.YES + return Answer.FAULTY else: - neighbours = self.context.get_neighbours(subject) - faulty_neighbours = [ce for ce in neighbours if self.context.is_faulty(ce)] - r = randint(0, 100) if r > self.context.knowledge: - return Answer.NO + return Answer.FAULTY else: - if len(faulty_neighbours) == 0: - return Answer.NO_AND_NOT_SUSPICIOUS - else: - return Answer.NO_BUT_SUSPICIOUS + return Answer.CLEAN def configure(self): questioner = self.Questioner(self.context) From 1c5d001ada26d628500c6597cab0cfd8e82bfcac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Thu, 23 Sep 2021 16:31:28 +0200 Subject: [PATCH 03/10] multi-purpose query feature for coverage graph --- .gitignore | 1 + framework/context/coverage.py | 59 +++++++++++++++++++++++-- framework/experiment/experiment_gong.py | 9 +++- framework/main.py | 17 +++++-- 4 files changed, 76 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index 7616e48..6037903 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,4 @@ ENV/ # PyCharm .idea +/framework/coverage.dump.graphml diff --git a/framework/context/coverage.py b/framework/context/coverage.py index 4bb8373..665ac8f 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -2,11 +2,13 @@ import re import tarfile from functools import lru_cache -from typing import List +from typing import List, Optional, Union, Tuple +import networkx import networkx as nx import trc2chains +from framework.context.code_element import CodeElement class CoverageMatrix(object): @@ -70,7 +72,8 @@ class TraceCoverageMatrix(object): def __init__(self, granularity: str): self.granularity = granularity self.graph = nx.Graph() - self.mapping = {} + self.key_mapping = {} + self.name_mapping = {} def _add_test(self, member: tarfile.TarInfo): file_name = os.path.basename(member.name) @@ -85,7 +88,7 @@ def _add_test(self, member: tarfile.TarInfo): return test_name def _add_code_element(self, code_element: str): - self.graph.add_node(code_element, type='code_element') + self.graph.add_node(code_element, type='code_element', name=self.key_mapping[code_element]) def _add_coverage(self, test: str, code_element: str, **attr): self._add_code_element(code_element) @@ -126,7 +129,8 @@ def create_mapping(self, archive: tarfile.TarFile, member: tarfile.TarInfo): id = int(parts[0]) name = parts[1] - self.mapping[id] = name + self.key_mapping[id] = name + self.name_mapping[name] = id @lru_cache(maxsize=2) def get_code_elements(self, with_result=True): @@ -154,6 +158,53 @@ def get_tests(self, with_result=True): return result + OR_OPERATOR = 'or' + AND_OPERATOR = 'and' + + @lru_cache(maxsize=2) + def query( + self, + operator: str, + test_result: Optional[str] = None, + type_of: Optional[str] = None, + neighbor_of_every: Tuple[Union[CodeElement, str, int], ...] = (), + neighbor_of_any: Tuple[Union[CodeElement, str, int], ...] = () + ): + result = [] + + for node, data in self.graph.nodes(data=True): + type_filter = type_of is None or data.get('type', None) == type_of + result_filter = test_result is None or data.get('result', None) == test_result + neighbors = self.graph.neighbors(node) + neighbor_every_filter =\ + neighbor_of_every == () or all([n in neighbors for n in self.get_graph_key(*neighbor_of_every)]) + neighbor_any_filter =\ + neighbor_of_any == () or any([n in neighbors for n in self.get_graph_key(*neighbor_of_any)]) + + if operator == TraceCoverageMatrix.OR_OPERATOR: + if type_filter or result_filter or neighbor_every_filter or neighbor_any_filter: + result.append((node, data)) + elif operator == TraceCoverageMatrix.AND_OPERATOR: + if type_filter and result_filter and neighbor_every_filter and neighbor_any_filter: + result.append((node, data)) + else: + raise ValueError(f'unsupported operator: {operator}') + + return result + + def get_graph_key(self, *items: Union[CodeElement, str, int]): + keys = [] + for item in items: + if isinstance(item, CodeElement): + keys.append(self.name_mapping[item.name]) + elif isinstance(item, str): + keys.append(self.name_mapping[item]) + elif isinstance(item, int): + keys.append(item) + else: + raise ValueError("unsupported type") + return keys + @classmethod def load_from_file(cls, file_path, granularity='binary'): coverage = TraceCoverageMatrix(granularity) diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index 67aee8d..3ab84c2 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -3,6 +3,7 @@ from framework.context.code_element import CodeElement from framework.context.context import Context +from framework.context.coverage import TraceCoverageMatrix from framework.core.answer import Answer from framework.core.mediator import Mediator from framework.core.oracle import Oracle @@ -11,7 +12,12 @@ def root_likelihood(code_element, context) -> float: - tests = context.coverage_matrix.get_tests(with_result=True) + tests = context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + test_result='PASS', + type_of='test', + neighbor_of_every=(code_element,) + ) raise NotImplementedError() return 42 @@ -44,7 +50,6 @@ def acknowledge(self, answer: Answer): else: raise ValueError('Gong experiment do not use code-context.') - class Oracle(Oracle): def __init__(self, context: Context): self.context = context diff --git a/framework/main.py b/framework/main.py index fb1546b..bee3061 100755 --- a/framework/main.py +++ b/framework/main.py @@ -20,6 +20,9 @@ parser.add_argument("-s", "--score", choices=["dstar", "ochiai", "tarantula"], default="tarantula", help="short name of the algorithm that is used to calculate the score") parser.add_argument("-t", "--threads", default=cpu_count() - 1, help="number of usable threads") +# introduced because concurrence breaks debugging traces, and makes debugging exceedingly difficult +parser.add_argument("-nc", "--no_concurrence", default=False, action="store_true", + help="remove any concurrence execution") args = parser.parse_args() @@ -46,7 +49,13 @@ def do(job): if __name__ == '__main__': - pool = Pool(processes=int(args.threads)) - pool.map(do, jobs()) - pool.close() - pool.join() + if args.no_concurrence: + print("running sequentially") + for job in jobs(): + do(job) + else: + print("running concurrently") + pool = Pool(processes=int(args.threads)) + pool.map(do, jobs()) + pool.close() + pool.join() From 3cc39abe3fd469acd9eabf3b9c616c7b0f8d9e22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Mon, 27 Sep 2021 17:10:26 +0200 Subject: [PATCH 04/10] root cause probability calculation, but still need to check --- framework/context/coverage.py | 9 +++++++++ framework/experiment/experiment_gong.py | 21 +++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/framework/context/coverage.py b/framework/context/coverage.py index 665ac8f..473db7f 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -220,4 +220,13 @@ def load_from_file(cls, file_path, granularity='binary'): coverage.parse_traces(archive, trace_files) + tests = coverage.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of='test' + ) + for test in tests: + test_name = test[0] + coverage.key_mapping[test_name] = test_name + coverage.name_mapping[test_name] = test_name + return coverage diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index 3ab84c2..65a43e3 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -11,15 +11,24 @@ from framework.experiment.experiment import Experiment -def root_likelihood(code_element, context) -> float: +def root_likelihood(symptom, cause, context) -> float: + d = len(context.code_element_set.code_elements) tests = context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, - test_result='PASS', + test_result='FAIL', type_of='test', - neighbor_of_every=(code_element,) + neighbor_of_every=(symptom, cause) ) - raise NotImplementedError() - return 42 + p = 0 + for t in tests: + t_name = t[0] + s_marks = context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of='code_element', + neighbor_of_every=(t_name,) + ) + p += d/len(s_marks) + return p class ExperimentGong(Experiment): @@ -44,7 +53,7 @@ def acknowledge(self, answer: Answer): code_elements = self.context.code_element_set.code_elements - {self.subject} causes: Dict[CodeElement, float] = {} for code_element in code_elements: - causes[code_element] = root_likelihood(code_element, self.context) + causes[code_element] = root_likelihood(self.subject, code_element, self.context) elif answer == Answer.FAULTY: print("Doing nothing, going to stop anyway.") else: From 49d42eba22a451a45785fd23d875f3ba35de65aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Tue, 28 Sep 2021 16:07:34 +0200 Subject: [PATCH 05/10] root cause probability calculation and most probably cause determined --- framework/experiment/experiment_gong.py | 8 +++++++- requirements.txt | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index 65a43e3..7c4b8db 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -9,6 +9,7 @@ from framework.core.oracle import Oracle from framework.core.questioner import Questioner from framework.experiment.experiment import Experiment +import tqdm def root_likelihood(symptom, cause, context) -> float: @@ -52,13 +53,18 @@ def acknowledge(self, answer: Answer): if answer == Answer.CLEAN: code_elements = self.context.code_element_set.code_elements - {self.subject} causes: Dict[CodeElement, float] = {} - for code_element in code_elements: + progress_bar = tqdm.tqdm(code_elements, desc='cause probability', unit='code element') + for code_element in progress_bar: causes[code_element] = root_likelihood(self.subject, code_element, self.context) + root_cause = max(causes.items(), key=lambda item: item[1]) + print(f"the most probable root cause is {root_cause}") elif answer == Answer.FAULTY: print("Doing nothing, going to stop anyway.") else: raise ValueError('Gong experiment do not use code-context.') + print() + class Oracle(Oracle): def __init__(self, context: Context): self.context = context diff --git a/requirements.txt b/requirements.txt index d980720..e70d6b4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ natsort==7.1.1 networkx==2.6.3 scipy==1.7.1 + +tqdm~=4.62.3 \ No newline at end of file From 03a0178ef631d9e458c108b2a3658b9d879e9a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Wed, 29 Sep 2021 16:34:39 +0200 Subject: [PATCH 06/10] querying code elements or test if their did not have specific neighbors --- framework/__init__.py | 0 framework/context/coverage.py | 124 +++++++++++++++++++-- framework/experiment/experiment.py | 2 +- framework/experiment/experiment_gong.py | 43 +++++-- framework/fl/__init__.py | 0 framework/fl/algorithm.py | 0 framework/main.py | 2 + framework/util/__init__.py | 0 framework/{utils.py => util/os_helpers.py} | 0 requirements.txt | 3 +- sir-scripts/1_coverage_results.py | 2 +- sir-scripts/3_source_meter.py | 2 +- sir-scripts/5_score.py | 2 +- 13 files changed, 154 insertions(+), 26 deletions(-) create mode 100644 framework/__init__.py create mode 100644 framework/fl/__init__.py create mode 100644 framework/fl/algorithm.py create mode 100644 framework/util/__init__.py rename framework/{utils.py => util/os_helpers.py} (100%) mode change 100755 => 100644 diff --git a/framework/__init__.py b/framework/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/context/coverage.py b/framework/context/coverage.py index 473db7f..db0f522 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -1,3 +1,4 @@ +import math import os import re import tarfile @@ -161,37 +162,42 @@ def get_tests(self, with_result=True): OR_OPERATOR = 'or' AND_OPERATOR = 'and' - @lru_cache(maxsize=2) + @lru_cache(maxsize=10000) def query( self, operator: str, test_result: Optional[str] = None, type_of: Optional[str] = None, neighbor_of_every: Tuple[Union[CodeElement, str, int], ...] = (), - neighbor_of_any: Tuple[Union[CodeElement, str, int], ...] = () + not_neighbor_of_every: Tuple[Union[CodeElement, str, int], ...] = (), + neighbor_of_any: Tuple[Union[CodeElement, str, int], ...] = (), + not_neighbor_of_any: Tuple[Union[CodeElement, str, int], ...] = () ): result = [] for node, data in self.graph.nodes(data=True): - type_filter = type_of is None or data.get('type', None) == type_of - result_filter = test_result is None or data.get('result', None) == test_result - neighbors = self.graph.neighbors(node) - neighbor_every_filter =\ - neighbor_of_every == () or all([n in neighbors for n in self.get_graph_key(*neighbor_of_every)]) - neighbor_any_filter =\ - neighbor_of_any == () or any([n in neighbors for n in self.get_graph_key(*neighbor_of_any)]) + neighbors = list(self.graph.neighbors(node)) + filters = ( + type_of is None or data.get('type', None) == type_of, + test_result is None or data.get('result', None) == test_result, + neighbor_of_every == () or all([n in neighbors for n in self.get_graph_key(*neighbor_of_every)]), + neighbor_of_any == () or any([n in neighbors for n in self.get_graph_key(*neighbor_of_any)]), + not_neighbor_of_every == () or all([n not in neighbors for n in self.get_graph_key(*not_neighbor_of_every)]), + not_neighbor_of_any == () or any([n not in neighbors for n in self.get_graph_key(*not_neighbor_of_any)]) + ) if operator == TraceCoverageMatrix.OR_OPERATOR: - if type_filter or result_filter or neighbor_every_filter or neighbor_any_filter: + if any(filters): result.append((node, data)) elif operator == TraceCoverageMatrix.AND_OPERATOR: - if type_filter and result_filter and neighbor_every_filter and neighbor_any_filter: + if all(filters): result.append((node, data)) else: raise ValueError(f'unsupported operator: {operator}') return result + @lru_cache(maxsize=10000) def get_graph_key(self, *items: Union[CodeElement, str, int]): keys = [] for item in items: @@ -229,4 +235,100 @@ def load_from_file(cls, file_path, granularity='binary'): coverage.key_mapping[test_name] = test_name coverage.name_mapping[test_name] = test_name + networkx.write_graphml(coverage.graph, 'coverage.dump.graphml') + return coverage + + def calculate_spectrum_metrics(self): + all_pass, all_failed = set(), set() + + for test, result in self.get_tests(): + if result == 'FAIL': + all_failed.add(test) + elif result == 'PASS': + all_pass.add(test) + else: + assert False + + total_pass, total_fail = len(all_pass), len(all_failed) + + for ce, data in self.get_code_elements(): + ep, ef = 0, 0 + + for node in self.graph.neighbors(ce): + data = self.graph.nodes[node] + + assert data['type'] == 'test' + + if data['result'] == 'FAIL': + ef += 1 + elif data['result'] == 'PASS': + ep += 1 + else: + assert False + + self.graph.nodes[ce].update( + dict( + spectrum_metrics=dict( + ef=ef, + ep=ep, + nf=total_fail - ef, + np=total_pass - ep, + ) + ) + ) + + def calculate_scores(self): + formulae = [tarantula, ochiai, dstar] + + for ce, data in self.get_code_elements(): + metrics = data['spectrum_metrics'] + + result = {} + + for formula in formulae: + score = formula(**metrics) + + result[formula.__name__] = score + + self.graph.nodes[ce].update(dict(scores=result)) + + +def ochiai(ef=0, ep=0, nf=0, np=0): + try: + denominator = math.sqrt((ef + nf) * (ef + ep)) + + score = ef / denominator + except ZeroDivisionError: + score = 0.0 + + return score + + +def tarantula(ef=0, ep=0, nf=0, np=0): + totalfail = ef + nf + totalpass = ep + np + + try: + nominator = ef / totalfail + denominator = (ef / totalfail) + (ep / totalpass) + + score = nominator / denominator + except ZeroDivisionError: + score = 0.0 + + return score + + +def dstar(ef=0, ep=0, nf=0, np=0, exponent=2): + totalfail = ef + nf + + nominator = ef ** exponent + denominator = ep + (totalfail - ef) + + try: + score = nominator / denominator + except ZeroDivisionError: + score = 0.0 + + return score diff --git a/framework/experiment/experiment.py b/framework/experiment/experiment.py index 5f6a434..7fb1e83 100644 --- a/framework/experiment/experiment.py +++ b/framework/experiment/experiment.py @@ -6,7 +6,7 @@ from framework.context.context import Context from framework.context.context import Defects4JContext from framework.context.context import SIRContext -from framework.utils import rm +from framework.util.os_helpers import rm class Experiment(object): diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index 7c4b8db..d59ea49 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,5 +1,5 @@ from random import randint -from typing import Dict +from typing import Dict, Tuple from framework.context.code_element import CodeElement from framework.context.context import Context @@ -12,8 +12,7 @@ import tqdm -def root_likelihood(symptom, cause, context) -> float: - d = len(context.code_element_set.code_elements) +def root_likelihood(symptom, cause, context, d: int) -> float: tests = context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, test_result='FAIL', @@ -51,19 +50,43 @@ def next_subject(self): def acknowledge(self, answer: Answer): if answer == Answer.CLEAN: - code_elements = self.context.code_element_set.code_elements - {self.subject} - causes: Dict[CodeElement, float] = {} - progress_bar = tqdm.tqdm(code_elements, desc='cause probability', unit='code element') - for code_element in progress_bar: - causes[code_element] = root_likelihood(self.subject, code_element, self.context) - root_cause = max(causes.items(), key=lambda item: item[1]) + print("the answer was CLEAN, propagating scores to root cause...") + root_cause = self._get_root_cause() print(f"the most probable root cause is {root_cause}") + + covering_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of='test', + neighbor_of_every=(root_cause[0],) + ) + covered_by_covering_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of='code_element', + neighbor_of_any=tuple(item[0] for item in covering_profiles) + ) + not_covering_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of='test', + not_neighbor_of_any=(root_cause[0],) + ) + self.context.coverage_matrix.calculate_spectrum_metrics() elif answer == Answer.FAULTY: print("Doing nothing, going to stop anyway.") else: raise ValueError('Gong experiment do not use code-context.') + print("propagation finished, scores updated") - print() + def _get_root_cause(self) -> Tuple[CodeElement, float]: + code_elements = self.context.code_element_set.code_elements - {self.subject} + d = len(self.context.code_element_set.code_elements) + causes: Dict[CodeElement, float] = {} + progress_bar = tqdm.tqdm(code_elements, desc='cause probability', unit='code element') + for code_element in progress_bar: + causes[code_element] = root_likelihood(self.subject, code_element, self.context, d) + root_cause = max(causes.items(), key=lambda item: item[1]) + # 2021-09-29: PyCharm type checking is incorrect for this + # noinspection PyTypeChecker + return root_cause class Oracle(Oracle): def __init__(self, context: Context): diff --git a/framework/fl/__init__.py b/framework/fl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/fl/algorithm.py b/framework/fl/algorithm.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/main.py b/framework/main.py index bee3061..eaf0934 100755 --- a/framework/main.py +++ b/framework/main.py @@ -1,4 +1,5 @@ import argparse +import sys from os.path import join as j from os import cpu_count from multiprocessing import Pool @@ -13,6 +14,7 @@ from framework.experiment.experiment_three import ExperimentThree from framework.experiment.experiment_two import ExperimentTwo + parser = argparse.ArgumentParser(description="Executes experiments.") parser.add_argument("-d", "--datadir", required=True, help="the directory where the raw coverage and results data of SIR programs can be found") diff --git a/framework/util/__init__.py b/framework/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/utils.py b/framework/util/os_helpers.py old mode 100755 new mode 100644 similarity index 100% rename from framework/utils.py rename to framework/util/os_helpers.py diff --git a/requirements.txt b/requirements.txt index e70d6b4..d886c68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ natsort==7.1.1 networkx==2.6.3 scipy==1.7.1 -tqdm~=4.62.3 \ No newline at end of file +tqdm~=4.62.3 +yappi~=1.3.2 \ No newline at end of file diff --git a/sir-scripts/1_coverage_results.py b/sir-scripts/1_coverage_results.py index 3687e73..1bafdf9 100755 --- a/sir-scripts/1_coverage_results.py +++ b/sir-scripts/1_coverage_results.py @@ -7,7 +7,7 @@ from natsort import natsorted from progressbar import ProgressBar -from framework.utils import cp, rm +from framework.util.os_helpers import cp, rm parser = argparse.ArgumentParser(description="Builds all versions (including the original); " "executes all test cases available in the 'runall.sh' script; " diff --git a/sir-scripts/3_source_meter.py b/sir-scripts/3_source_meter.py index 6a08963..ee31c0e 100755 --- a/sir-scripts/3_source_meter.py +++ b/sir-scripts/3_source_meter.py @@ -10,7 +10,7 @@ from natsort import natsorted -from framework.utils import cp, rm +from framework.util.os_helpers import cp, rm parser = argparse.ArgumentParser(description="Builds all versions (including the original); " "utilizes SourceMeter to analyze the structure of the program; " diff --git a/sir-scripts/5_score.py b/sir-scripts/5_score.py index cf2a3b6..9aba2c0 100755 --- a/sir-scripts/5_score.py +++ b/sir-scripts/5_score.py @@ -9,7 +9,7 @@ from natsort import natsorted from framework.context.change import ChangeMatrix -from framework.utils import rm +from framework.util.os_helpers import rm parser = argparse.ArgumentParser(description="Creates basic score data from SoDA binaries.") From e29cb949c469f360c212008512eb62070037b1d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Thu, 30 Sep 2021 16:01:51 +0200 Subject: [PATCH 07/10] computing relevant and irrelevant sub coverage matrices for root cause --- .gitignore | 2 + framework/context/coverage.py | 22 +++++--- framework/experiment/experiment_gong.py | 68 +++++++++++++++++-------- 3 files changed, 64 insertions(+), 28 deletions(-) diff --git a/.gitignore b/.gitignore index 6037903..7e130d2 100644 --- a/.gitignore +++ b/.gitignore @@ -103,3 +103,5 @@ ENV/ # PyCharm .idea /framework/coverage.dump.graphml +/framework/irrelevant.dump.graphml +/framework/relevant.dump.graphml diff --git a/framework/context/coverage.py b/framework/context/coverage.py index db0f522..72ba5d9 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -1,3 +1,5 @@ +import copy +import json import math import os import re @@ -84,7 +86,7 @@ def _add_test(self, member: tarfile.TarInfo): test_name = match.group('test') test_result = match.group('result') - self.graph.add_node(test_name, type='test', result=test_result) + self.graph.add_node(test_name, type='test', name=test_name, result=test_result) return test_name @@ -239,7 +241,13 @@ def load_from_file(cls, file_path, granularity='binary'): return coverage - def calculate_spectrum_metrics(self): + def sub(self, base_nodes: Tuple[Union[CodeElement, str, int], ...]): + sub_coverage = copy.deepcopy(self) + base_ids = self.get_graph_key(*base_nodes) + sub_coverage.graph = self.graph.subgraph(base_ids) + return sub_coverage + + def _calculate_spectrum_metrics(self): all_pass, all_failed = set(), set() for test, result in self.get_tests(): @@ -269,20 +277,22 @@ def calculate_spectrum_metrics(self): self.graph.nodes[ce].update( dict( - spectrum_metrics=dict( + spectrum_metrics=json.dumps(dict( ef=ef, ep=ep, nf=total_fail - ef, np=total_pass - ep, - ) + )) ) ) def calculate_scores(self): + self._calculate_spectrum_metrics() + formulae = [tarantula, ochiai, dstar] for ce, data in self.get_code_elements(): - metrics = data['spectrum_metrics'] + metrics = json.loads(data['spectrum_metrics']) result = {} @@ -291,7 +301,7 @@ def calculate_scores(self): result[formula.__name__] = score - self.graph.nodes[ce].update(dict(scores=result)) + self.graph.nodes[ce].update(dict(scores=json.dumps(result))) def ochiai(ef=0, ep=0, nf=0, np=0): diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index d59ea49..f8f9d33 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,6 +1,8 @@ from random import randint from typing import Dict, Tuple +import networkx + from framework.context.code_element import CodeElement from framework.context.context import Context from framework.context.coverage import TraceCoverageMatrix @@ -15,19 +17,19 @@ def root_likelihood(symptom, cause, context, d: int) -> float: tests = context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, - test_result='FAIL', - type_of='test', - neighbor_of_every=(symptom, cause) + test_result="FAIL", + type_of="test", + neighbor_of_every=(symptom, cause), ) p = 0 for t in tests: t_name = t[0] s_marks = context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, - type_of='code_element', - neighbor_of_every=(t_name,) + type_of="code_element", + neighbor_of_every=(t_name,), ) - p += d/len(s_marks) + p += d / len(s_marks) return p @@ -38,14 +40,15 @@ class ExperimentGong(Experiment): """ class Questioner(Questioner): - def __init__(self, context: Context): self.context = context self.subject = None def next_subject(self): - self.subject = sorted(self.context.code_element_set.code_elements, - key=lambda ce: (-ce.score, ce.name))[0] + self.subject = sorted( + self.context.code_element_set.code_elements, + key=lambda ce: (-ce.score, ce.name), + )[0] return self.subject def acknowledge(self, answer: Answer): @@ -56,33 +59,54 @@ def acknowledge(self, answer: Answer): covering_profiles = self.context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, - type_of='test', - neighbor_of_every=(root_cause[0],) - ) - covered_by_covering_profiles = self.context.coverage_matrix.query( - TraceCoverageMatrix.AND_OPERATOR, - type_of='code_element', - neighbor_of_any=tuple(item[0] for item in covering_profiles) + type_of="test", + neighbor_of_every=(root_cause[0],), ) + relevant_sub_matrix = self._get_sub_matrix(covering_profiles) + relevant_sub_matrix.calculate_scores() + networkx.write_graphml(relevant_sub_matrix.graph, 'relevant.dump.graphml') + not_covering_profiles = self.context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, - type_of='test', - not_neighbor_of_any=(root_cause[0],) + type_of="test", + not_neighbor_of_any=(root_cause[0],), ) - self.context.coverage_matrix.calculate_spectrum_metrics() + irrelevant_sub_matrix = self._get_sub_matrix(not_covering_profiles) + irrelevant_sub_matrix.calculate_scores() + networkx.write_graphml(irrelevant_sub_matrix.graph, 'irrelevant.dump.graphml') + + raise NotImplementedError() elif answer == Answer.FAULTY: print("Doing nothing, going to stop anyway.") else: - raise ValueError('Gong experiment do not use code-context.') + raise ValueError("Gong experiment do not use code-context.") print("propagation finished, scores updated") + def _get_sub_matrix(self, profiles): + covered_by_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of="code_element", + neighbor_of_any=tuple(item[0] for item in profiles), + ) + relevant_nodes = set( + item[0] for item in profiles + ) | set( + item[0] for item in covered_by_profiles + ) + relevant_sub_matrix = self.context.coverage_matrix.sub(tuple(relevant_nodes)) + return relevant_sub_matrix + def _get_root_cause(self) -> Tuple[CodeElement, float]: code_elements = self.context.code_element_set.code_elements - {self.subject} d = len(self.context.code_element_set.code_elements) causes: Dict[CodeElement, float] = {} - progress_bar = tqdm.tqdm(code_elements, desc='cause probability', unit='code element') + progress_bar = tqdm.tqdm( + code_elements, desc="cause probability", unit="code element" + ) for code_element in progress_bar: - causes[code_element] = root_likelihood(self.subject, code_element, self.context, d) + causes[code_element] = root_likelihood( + self.subject, code_element, self.context, d + ) root_cause = max(causes.items(), key=lambda item: item[1]) # 2021-09-29: PyCharm type checking is incorrect for this # noinspection PyTypeChecker From 962341b5571a7eb5f8d7e355de38bc53a61132be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gerg=C5=91=20Balogh?= Date: Mon, 4 Oct 2021 16:55:32 +0200 Subject: [PATCH 08/10] computing relevant and irrelevant scores and adjusting the scores of the root cause --- framework/context/coverage.py | 9 +++++++++ framework/experiment/experiment_gong.py | 16 +++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/framework/context/coverage.py b/framework/context/coverage.py index 72ba5d9..86cf303 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -199,6 +199,15 @@ def query( return result + def extract_score(self, name: str) -> float: + try: + return json.loads( + [item for item in self.get_code_elements() if item[1]['name'] == name][0][1]['scores'] + )[CodeElement.SCORE_TYPE] + except: + print("something went wrong assuming 0 score") + return 0.0 + @lru_cache(maxsize=10000) def get_graph_key(self, *items: Union[CodeElement, str, int]): keys = [] diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index f8f9d33..34afc6e 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,5 +1,6 @@ +import json from random import randint -from typing import Dict, Tuple +from typing import Dict, Tuple, Optional import networkx @@ -42,7 +43,7 @@ class ExperimentGong(Experiment): class Questioner(Questioner): def __init__(self, context: Context): self.context = context - self.subject = None + self.subject: Optional[CodeElement] = None def next_subject(self): self.subject = sorted( @@ -75,7 +76,16 @@ def acknowledge(self, answer: Answer): irrelevant_sub_matrix.calculate_scores() networkx.write_graphml(irrelevant_sub_matrix.graph, 'irrelevant.dump.graphml') - raise NotImplementedError() + print(f"old: {root_cause[0]}") + original_score_of_inspected = self.subject.score + original_score_of_root_cause = root_cause[0].score + print("compute relevant score") + relevant_score_of_inspected = relevant_sub_matrix.extract_score(self.subject.name) + print("compute irrelevant score") + irrelevant_score_of_inspected = irrelevant_sub_matrix.extract_score(self.subject.name) + adjustment_weight = relevant_score_of_inspected - irrelevant_score_of_inspected + root_cause[0].score = original_score_of_root_cause + adjustment_weight * original_score_of_inspected + print(f"new: {root_cause[0]}") elif answer == Answer.FAULTY: print("Doing nothing, going to stop anyway.") else: From 80cf3918b1f8affd4a8cd3d627afe1d452ed2a9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ferenc=20Horv=C3=A1th?= Date: Mon, 18 Oct 2021 21:03:22 +0200 Subject: [PATCH 09/10] Fixed an issue of investigated elements being suggested for reinvestigation --- framework/context/coverage.py | 2 +- framework/experiment/experiment_gong.py | 32 ++++++++++++++++++------- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/framework/context/coverage.py b/framework/context/coverage.py index 86cf303..86bff73 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -246,7 +246,7 @@ def load_from_file(cls, file_path, granularity='binary'): coverage.key_mapping[test_name] = test_name coverage.name_mapping[test_name] = test_name - networkx.write_graphml(coverage.graph, 'coverage.dump.graphml') + # networkx.write_graphml(coverage.graph, 'coverage.dump.graphml') return coverage diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index 34afc6e..366ef07 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,6 +1,6 @@ import json from random import randint -from typing import Dict, Tuple, Optional +from typing import Dict, Tuple, Optional, Set import networkx @@ -15,7 +15,14 @@ import tqdm -def root_likelihood(symptom, cause, context, d: int) -> float: +def root_likelihood( + symptom: CodeElement, + cause: CodeElement, + inspected: Set[CodeElement], + context: Context, + d: int +) -> float: + name_of_inspected = [ce.name for ce in inspected] tests = context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, test_result="FAIL", @@ -30,7 +37,8 @@ def root_likelihood(symptom, cause, context, d: int) -> float: type_of="code_element", neighbor_of_every=(t_name,), ) - p += d / len(s_marks) + not_inspected_s = [item for item in s_marks if item[1]['name'] not in name_of_inspected] + p += d / len(not_inspected_s) return p @@ -43,7 +51,9 @@ class ExperimentGong(Experiment): class Questioner(Questioner): def __init__(self, context: Context): self.context = context + self.size_of_D = len(self.context.code_element_set.code_elements) self.subject: Optional[CodeElement] = None + self.inspected: Set[CodeElement] = set() def next_subject(self): self.subject = sorted( @@ -53,6 +63,8 @@ def next_subject(self): return self.subject def acknowledge(self, answer: Answer): + self.inspected.add(self.subject) + if answer == Answer.CLEAN: print("the answer was CLEAN, propagating scores to root cause...") root_cause = self._get_root_cause() @@ -65,7 +77,7 @@ def acknowledge(self, answer: Answer): ) relevant_sub_matrix = self._get_sub_matrix(covering_profiles) relevant_sub_matrix.calculate_scores() - networkx.write_graphml(relevant_sub_matrix.graph, 'relevant.dump.graphml') + # networkx.write_graphml(relevant_sub_matrix.graph, 'relevant.dump.graphml') not_covering_profiles = self.context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, @@ -74,7 +86,7 @@ def acknowledge(self, answer: Answer): ) irrelevant_sub_matrix = self._get_sub_matrix(not_covering_profiles) irrelevant_sub_matrix.calculate_scores() - networkx.write_graphml(irrelevant_sub_matrix.graph, 'irrelevant.dump.graphml') + # networkx.write_graphml(irrelevant_sub_matrix.graph, 'irrelevant.dump.graphml') print(f"old: {root_cause[0]}") original_score_of_inspected = self.subject.score @@ -83,6 +95,7 @@ def acknowledge(self, answer: Answer): relevant_score_of_inspected = relevant_sub_matrix.extract_score(self.subject.name) print("compute irrelevant score") irrelevant_score_of_inspected = irrelevant_sub_matrix.extract_score(self.subject.name) + print(relevant_score_of_inspected, irrelevant_score_of_inspected) adjustment_weight = relevant_score_of_inspected - irrelevant_score_of_inspected root_cause[0].score = original_score_of_root_cause + adjustment_weight * original_score_of_inspected print(f"new: {root_cause[0]}") @@ -92,6 +105,8 @@ def acknowledge(self, answer: Answer): raise ValueError("Gong experiment do not use code-context.") print("propagation finished, scores updated") + self.subject.score = 0 + def _get_sub_matrix(self, profiles): covered_by_profiles = self.context.coverage_matrix.query( TraceCoverageMatrix.AND_OPERATOR, @@ -107,17 +122,16 @@ def _get_sub_matrix(self, profiles): return relevant_sub_matrix def _get_root_cause(self) -> Tuple[CodeElement, float]: - code_elements = self.context.code_element_set.code_elements - {self.subject} - d = len(self.context.code_element_set.code_elements) + code_elements = self.context.code_element_set.code_elements - self.inspected causes: Dict[CodeElement, float] = {} progress_bar = tqdm.tqdm( code_elements, desc="cause probability", unit="code element" ) for code_element in progress_bar: causes[code_element] = root_likelihood( - self.subject, code_element, self.context, d + self.subject, code_element, self.inspected, self.context, self.size_of_D ) - root_cause = max(causes.items(), key=lambda item: item[1]) + root_cause = min(causes.items(), key=lambda item: (-item[1], item[0].name)) # 2021-09-29: PyCharm type checking is incorrect for this # noinspection PyTypeChecker return root_cause From 525936a3cb3cae6aedfbf7a96fd181e59b67584f Mon Sep 17 00:00:00 2001 From: fhorvath Date: Wed, 20 Oct 2021 11:27:42 +0200 Subject: [PATCH 10/10] Added project selector argument --- framework/experiment/experiment.py | 5 ++++- framework/main.py | 10 +++++++++- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/framework/experiment/experiment.py b/framework/experiment/experiment.py index 7fb1e83..5f0bdd8 100644 --- a/framework/experiment/experiment.py +++ b/framework/experiment/experiment.py @@ -20,7 +20,7 @@ def __init__(self, name: str, context_type: Context): def configure(self): raise NotImplementedError - def run(self, data_dir: str, output_dir: str, knowledge=100, confidence=100): + def run(self, data_dir: str, output_dir: str, knowledge=100, confidence=100, projects=[]): output_dir = j(output_dir, self.name) rm(output_dir, "*") @@ -35,6 +35,9 @@ def run(self, data_dir: str, output_dir: str, knowledge=100, confidence=100): } print(program) + if projects and program['name'] not in projects: + continue + function_csv = j(data_dir, program["name"], "%(name)s.%(version)s.function.csv" % program) change_csv = j(data_dir, "changeset", "changeset.json") coverage_csv = j(data_dir, "coverage", program["name"], "%(version)sb" % program, "binary", "%(name)s-%(version)sb-binary.tar.gz" % program) diff --git a/framework/main.py b/framework/main.py index eaf0934..de96ff8 100755 --- a/framework/main.py +++ b/framework/main.py @@ -21,6 +21,8 @@ parser.add_argument("-o", "--outdir", required=True, help="output directory") parser.add_argument("-s", "--score", choices=["dstar", "ochiai", "tarantula"], default="tarantula", help="short name of the algorithm that is used to calculate the score") +parser.add_argument("-p", "--projects", nargs="+", required=False, default=[], + help="Name of projects on which the experiments should run") parser.add_argument("-t", "--threads", default=cpu_count() - 1, help="number of usable threads") # introduced because concurrence breaks debugging traces, and makes debugging exceedingly difficult parser.add_argument("-nc", "--no_concurrence", default=False, action="store_true", @@ -47,7 +49,13 @@ def jobs(): def do(job): e = ExperimentGong("expgong", Defects4JContext) - e.run(args.datadir, j(args.outdir, args.score, str(job[0]), str(job[1]), str(job[2])), knowledge=job[0], confidence=job[2]) + e.run( + args.datadir, + j(args.outdir, args.score, str(job[0]), str(job[1]), str(job[2])), + knowledge=job[0], + confidence=job[2], + projects=args.projects, + ) if __name__ == '__main__':