diff --git a/.gitignore b/.gitignore index 7616e48..7e130d2 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ ENV/ # PyCharm .idea +/framework/coverage.dump.graphml +/framework/irrelevant.dump.graphml +/framework/relevant.dump.graphml diff --git a/framework/__init__.py b/framework/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/context/coverage.py b/framework/context/coverage.py index 4bb8373..86bff73 100644 --- a/framework/context/coverage.py +++ b/framework/context/coverage.py @@ -1,12 +1,17 @@ +import copy +import json +import math import os import re import tarfile from functools import lru_cache -from typing import List +from typing import List, Optional, Union, Tuple +import networkx import networkx as nx import trc2chains +from framework.context.code_element import CodeElement class CoverageMatrix(object): @@ -70,7 +75,8 @@ class TraceCoverageMatrix(object): def __init__(self, granularity: str): self.granularity = granularity self.graph = nx.Graph() - self.mapping = {} + self.key_mapping = {} + self.name_mapping = {} def _add_test(self, member: tarfile.TarInfo): file_name = os.path.basename(member.name) @@ -80,12 +86,12 @@ def _add_test(self, member: tarfile.TarInfo): test_name = match.group('test') test_result = match.group('result') - self.graph.add_node(test_name, type='test', result=test_result) + self.graph.add_node(test_name, type='test', name=test_name, result=test_result) return test_name def _add_code_element(self, code_element: str): - self.graph.add_node(code_element, type='code_element') + self.graph.add_node(code_element, type='code_element', name=self.key_mapping[code_element]) def _add_coverage(self, test: str, code_element: str, **attr): self._add_code_element(code_element) @@ -126,7 +132,8 @@ def create_mapping(self, archive: tarfile.TarFile, member: tarfile.TarInfo): id = int(parts[0]) name = parts[1] - self.mapping[id] = name + self.key_mapping[id] = name + self.name_mapping[name] = id @lru_cache(maxsize=2) def get_code_elements(self, with_result=True): @@ -154,6 +161,67 @@ def get_tests(self, with_result=True): return result + OR_OPERATOR = 'or' + AND_OPERATOR = 'and' + + @lru_cache(maxsize=10000) + def query( + self, + operator: str, + test_result: Optional[str] = None, + type_of: Optional[str] = None, + neighbor_of_every: Tuple[Union[CodeElement, str, int], ...] = (), + not_neighbor_of_every: Tuple[Union[CodeElement, str, int], ...] = (), + neighbor_of_any: Tuple[Union[CodeElement, str, int], ...] = (), + not_neighbor_of_any: Tuple[Union[CodeElement, str, int], ...] = () + ): + result = [] + + for node, data in self.graph.nodes(data=True): + neighbors = list(self.graph.neighbors(node)) + filters = ( + type_of is None or data.get('type', None) == type_of, + test_result is None or data.get('result', None) == test_result, + neighbor_of_every == () or all([n in neighbors for n in self.get_graph_key(*neighbor_of_every)]), + neighbor_of_any == () or any([n in neighbors for n in self.get_graph_key(*neighbor_of_any)]), + not_neighbor_of_every == () or all([n not in neighbors for n in self.get_graph_key(*not_neighbor_of_every)]), + not_neighbor_of_any == () or any([n not in neighbors for n in self.get_graph_key(*not_neighbor_of_any)]) + ) + + if operator == TraceCoverageMatrix.OR_OPERATOR: + if any(filters): + result.append((node, data)) + elif operator == TraceCoverageMatrix.AND_OPERATOR: + if all(filters): + result.append((node, data)) + else: + raise ValueError(f'unsupported operator: {operator}') + + return result + + def extract_score(self, name: str) -> float: + try: + return json.loads( + [item for item in self.get_code_elements() if item[1]['name'] == name][0][1]['scores'] + )[CodeElement.SCORE_TYPE] + except: + print("something went wrong assuming 0 score") + return 0.0 + + @lru_cache(maxsize=10000) + def get_graph_key(self, *items: Union[CodeElement, str, int]): + keys = [] + for item in items: + if isinstance(item, CodeElement): + keys.append(self.name_mapping[item.name]) + elif isinstance(item, str): + keys.append(self.name_mapping[item]) + elif isinstance(item, int): + keys.append(item) + else: + raise ValueError("unsupported type") + return keys + @classmethod def load_from_file(cls, file_path, granularity='binary'): coverage = TraceCoverageMatrix(granularity) @@ -169,4 +237,117 @@ def load_from_file(cls, file_path, granularity='binary'): coverage.parse_traces(archive, trace_files) + tests = coverage.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of='test' + ) + for test in tests: + test_name = test[0] + coverage.key_mapping[test_name] = test_name + coverage.name_mapping[test_name] = test_name + + # networkx.write_graphml(coverage.graph, 'coverage.dump.graphml') + return coverage + + def sub(self, base_nodes: Tuple[Union[CodeElement, str, int], ...]): + sub_coverage = copy.deepcopy(self) + base_ids = self.get_graph_key(*base_nodes) + sub_coverage.graph = self.graph.subgraph(base_ids) + return sub_coverage + + def _calculate_spectrum_metrics(self): + all_pass, all_failed = set(), set() + + for test, result in self.get_tests(): + if result == 'FAIL': + all_failed.add(test) + elif result == 'PASS': + all_pass.add(test) + else: + assert False + + total_pass, total_fail = len(all_pass), len(all_failed) + + for ce, data in self.get_code_elements(): + ep, ef = 0, 0 + + for node in self.graph.neighbors(ce): + data = self.graph.nodes[node] + + assert data['type'] == 'test' + + if data['result'] == 'FAIL': + ef += 1 + elif data['result'] == 'PASS': + ep += 1 + else: + assert False + + self.graph.nodes[ce].update( + dict( + spectrum_metrics=json.dumps(dict( + ef=ef, + ep=ep, + nf=total_fail - ef, + np=total_pass - ep, + )) + ) + ) + + def calculate_scores(self): + self._calculate_spectrum_metrics() + + formulae = [tarantula, ochiai, dstar] + + for ce, data in self.get_code_elements(): + metrics = json.loads(data['spectrum_metrics']) + + result = {} + + for formula in formulae: + score = formula(**metrics) + + result[formula.__name__] = score + + self.graph.nodes[ce].update(dict(scores=json.dumps(result))) + + +def ochiai(ef=0, ep=0, nf=0, np=0): + try: + denominator = math.sqrt((ef + nf) * (ef + ep)) + + score = ef / denominator + except ZeroDivisionError: + score = 0.0 + + return score + + +def tarantula(ef=0, ep=0, nf=0, np=0): + totalfail = ef + nf + totalpass = ep + np + + try: + nominator = ef / totalfail + denominator = (ef / totalfail) + (ep / totalpass) + + score = nominator / denominator + except ZeroDivisionError: + score = 0.0 + + return score + + +def dstar(ef=0, ep=0, nf=0, np=0, exponent=2): + totalfail = ef + nf + + nominator = ef ** exponent + denominator = ep + (totalfail - ef) + + try: + score = nominator / denominator + except ZeroDivisionError: + score = 0.0 + + return score diff --git a/framework/core/answer.py b/framework/core/answer.py index 2f273e7..41733ed 100644 --- a/framework/core/answer.py +++ b/framework/core/answer.py @@ -5,8 +5,10 @@ class Answer(Enum): # Subject is faulty YES = 1 + FAULTY = 1 # introduced for Gong experiment # Subject is not faulty NO = 2 + CLEAN = 2 # introduced for Gong experiment # Subject is faulty but its neighbours are suspicious NO_BUT_SUSPICIOUS = 3 # Neither the subject, nor its neighbours are faulty diff --git a/framework/experiment/experiment.py b/framework/experiment/experiment.py index 5f6a434..5f0bdd8 100644 --- a/framework/experiment/experiment.py +++ b/framework/experiment/experiment.py @@ -6,7 +6,7 @@ from framework.context.context import Context from framework.context.context import Defects4JContext from framework.context.context import SIRContext -from framework.utils import rm +from framework.util.os_helpers import rm class Experiment(object): @@ -20,7 +20,7 @@ def __init__(self, name: str, context_type: Context): def configure(self): raise NotImplementedError - def run(self, data_dir: str, output_dir: str, knowledge=100, confidence=100): + def run(self, data_dir: str, output_dir: str, knowledge=100, confidence=100, projects=[]): output_dir = j(output_dir, self.name) rm(output_dir, "*") @@ -35,6 +35,9 @@ def run(self, data_dir: str, output_dir: str, knowledge=100, confidence=100): } print(program) + if projects and program['name'] not in projects: + continue + function_csv = j(data_dir, program["name"], "%(name)s.%(version)s.function.csv" % program) change_csv = j(data_dir, "changeset", "changeset.json") coverage_csv = j(data_dir, "coverage", program["name"], "%(version)sb" % program, "binary", "%(name)s-%(version)sb-binary.tar.gz" % program) diff --git a/framework/experiment/experiment_gong.py b/framework/experiment/experiment_gong.py index a50f4b2..366ef07 100644 --- a/framework/experiment/experiment_gong.py +++ b/framework/experiment/experiment_gong.py @@ -1,9 +1,45 @@ +import json +from random import randint +from typing import Dict, Tuple, Optional, Set + +import networkx + +from framework.context.code_element import CodeElement from framework.context.context import Context +from framework.context.coverage import TraceCoverageMatrix from framework.core.answer import Answer from framework.core.mediator import Mediator from framework.core.oracle import Oracle from framework.core.questioner import Questioner from framework.experiment.experiment import Experiment +import tqdm + + +def root_likelihood( + symptom: CodeElement, + cause: CodeElement, + inspected: Set[CodeElement], + context: Context, + d: int +) -> float: + name_of_inspected = [ce.name for ce in inspected] + tests = context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + test_result="FAIL", + type_of="test", + neighbor_of_every=(symptom, cause), + ) + p = 0 + for t in tests: + t_name = t[0] + s_marks = context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of="code_element", + neighbor_of_every=(t_name,), + ) + not_inspected_s = [item for item in s_marks if item[1]['name'] not in name_of_inspected] + p += d / len(not_inspected_s) + return p class ExperimentGong(Experiment): @@ -13,23 +49,107 @@ class ExperimentGong(Experiment): """ class Questioner(Questioner): - def __init__(self, context: Context): self.context = context + self.size_of_D = len(self.context.code_element_set.code_elements) + self.subject: Optional[CodeElement] = None + self.inspected: Set[CodeElement] = set() def next_subject(self): - pass + self.subject = sorted( + self.context.code_element_set.code_elements, + key=lambda ce: (-ce.score, ce.name), + )[0] + return self.subject def acknowledge(self, answer: Answer): - pass + self.inspected.add(self.subject) - class Oracle(Oracle): + if answer == Answer.CLEAN: + print("the answer was CLEAN, propagating scores to root cause...") + root_cause = self._get_root_cause() + print(f"the most probable root cause is {root_cause}") + + covering_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of="test", + neighbor_of_every=(root_cause[0],), + ) + relevant_sub_matrix = self._get_sub_matrix(covering_profiles) + relevant_sub_matrix.calculate_scores() + # networkx.write_graphml(relevant_sub_matrix.graph, 'relevant.dump.graphml') + not_covering_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of="test", + not_neighbor_of_any=(root_cause[0],), + ) + irrelevant_sub_matrix = self._get_sub_matrix(not_covering_profiles) + irrelevant_sub_matrix.calculate_scores() + # networkx.write_graphml(irrelevant_sub_matrix.graph, 'irrelevant.dump.graphml') + + print(f"old: {root_cause[0]}") + original_score_of_inspected = self.subject.score + original_score_of_root_cause = root_cause[0].score + print("compute relevant score") + relevant_score_of_inspected = relevant_sub_matrix.extract_score(self.subject.name) + print("compute irrelevant score") + irrelevant_score_of_inspected = irrelevant_sub_matrix.extract_score(self.subject.name) + print(relevant_score_of_inspected, irrelevant_score_of_inspected) + adjustment_weight = relevant_score_of_inspected - irrelevant_score_of_inspected + root_cause[0].score = original_score_of_root_cause + adjustment_weight * original_score_of_inspected + print(f"new: {root_cause[0]}") + elif answer == Answer.FAULTY: + print("Doing nothing, going to stop anyway.") + else: + raise ValueError("Gong experiment do not use code-context.") + print("propagation finished, scores updated") + + self.subject.score = 0 + + def _get_sub_matrix(self, profiles): + covered_by_profiles = self.context.coverage_matrix.query( + TraceCoverageMatrix.AND_OPERATOR, + type_of="code_element", + neighbor_of_any=tuple(item[0] for item in profiles), + ) + relevant_nodes = set( + item[0] for item in profiles + ) | set( + item[0] for item in covered_by_profiles + ) + relevant_sub_matrix = self.context.coverage_matrix.sub(tuple(relevant_nodes)) + return relevant_sub_matrix + + def _get_root_cause(self) -> Tuple[CodeElement, float]: + code_elements = self.context.code_element_set.code_elements - self.inspected + causes: Dict[CodeElement, float] = {} + progress_bar = tqdm.tqdm( + code_elements, desc="cause probability", unit="code element" + ) + for code_element in progress_bar: + causes[code_element] = root_likelihood( + self.subject, code_element, self.inspected, self.context, self.size_of_D + ) + root_cause = min(causes.items(), key=lambda item: (-item[1], item[0].name)) + # 2021-09-29: PyCharm type checking is incorrect for this + # noinspection PyTypeChecker + return root_cause + + class Oracle(Oracle): def __init__(self, context: Context): self.context = context def ask_about(self, subject): - pass + if self.context.is_faulty(subject): + return Answer.FAULTY + else: + r = randint(0, 100) + + if r > self.context.knowledge: + return Answer.FAULTY + else: + return Answer.CLEAN def configure(self): questioner = self.Questioner(self.context) diff --git a/framework/fl/__init__.py b/framework/fl/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/fl/algorithm.py b/framework/fl/algorithm.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/main.py b/framework/main.py index 1b54588..de96ff8 100755 --- a/framework/main.py +++ b/framework/main.py @@ -1,4 +1,5 @@ import argparse +import sys from os.path import join as j from os import cpu_count from multiprocessing import Pool @@ -6,19 +7,26 @@ from framework.context.code_element import CodeElement from framework.context.context import Defects4JContext from framework.context.context import SIRContext +from framework.experiment.experiment_gong import ExperimentGong from framework.experiment.experiment_one import Experiment from framework.experiment.experiment_one import ExperimentOne from framework.experiment.experiment_one import ExperimentOneB from framework.experiment.experiment_three import ExperimentThree from framework.experiment.experiment_two import ExperimentTwo + parser = argparse.ArgumentParser(description="Executes experiments.") parser.add_argument("-d", "--datadir", required=True, help="the directory where the raw coverage and results data of SIR programs can be found") parser.add_argument("-o", "--outdir", required=True, help="output directory") parser.add_argument("-s", "--score", choices=["dstar", "ochiai", "tarantula"], default="tarantula", help="short name of the algorithm that is used to calculate the score") +parser.add_argument("-p", "--projects", nargs="+", required=False, default=[], + help="Name of projects on which the experiments should run") parser.add_argument("-t", "--threads", default=cpu_count() - 1, help="number of usable threads") +# introduced because concurrence breaks debugging traces, and makes debugging exceedingly difficult +parser.add_argument("-nc", "--no_concurrence", default=False, action="store_true", + help="remove any concurrence execution") args = parser.parse_args() @@ -40,12 +48,24 @@ def jobs(): def do(job): - e = ExperimentOneB("ex1b", Defects4JContext) - e.run(args.datadir, j(args.outdir, args.score, str(job[0]), str(job[1]), str(job[2])), knowledge=job[0], confidence=job[2]) + e = ExperimentGong("expgong", Defects4JContext) + e.run( + args.datadir, + j(args.outdir, args.score, str(job[0]), str(job[1]), str(job[2])), + knowledge=job[0], + confidence=job[2], + projects=args.projects, + ) if __name__ == '__main__': - pool = Pool(processes=int(args.threads)) - pool.map(do, jobs()) - pool.close() - pool.join() + if args.no_concurrence: + print("running sequentially") + for job in jobs(): + do(job) + else: + print("running concurrently") + pool = Pool(processes=int(args.threads)) + pool.map(do, jobs()) + pool.close() + pool.join() diff --git a/framework/util/__init__.py b/framework/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/framework/utils.py b/framework/util/os_helpers.py old mode 100755 new mode 100644 similarity index 100% rename from framework/utils.py rename to framework/util/os_helpers.py diff --git a/requirements.txt b/requirements.txt index d980720..d886c68 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,6 @@ natsort==7.1.1 networkx==2.6.3 scipy==1.7.1 + +tqdm~=4.62.3 +yappi~=1.3.2 \ No newline at end of file diff --git a/sir-scripts/1_coverage_results.py b/sir-scripts/1_coverage_results.py index 3687e73..1bafdf9 100755 --- a/sir-scripts/1_coverage_results.py +++ b/sir-scripts/1_coverage_results.py @@ -7,7 +7,7 @@ from natsort import natsorted from progressbar import ProgressBar -from framework.utils import cp, rm +from framework.util.os_helpers import cp, rm parser = argparse.ArgumentParser(description="Builds all versions (including the original); " "executes all test cases available in the 'runall.sh' script; " diff --git a/sir-scripts/3_source_meter.py b/sir-scripts/3_source_meter.py index 6a08963..ee31c0e 100755 --- a/sir-scripts/3_source_meter.py +++ b/sir-scripts/3_source_meter.py @@ -10,7 +10,7 @@ from natsort import natsorted -from framework.utils import cp, rm +from framework.util.os_helpers import cp, rm parser = argparse.ArgumentParser(description="Builds all versions (including the original); " "utilizes SourceMeter to analyze the structure of the program; " diff --git a/sir-scripts/5_score.py b/sir-scripts/5_score.py index cf2a3b6..9aba2c0 100755 --- a/sir-scripts/5_score.py +++ b/sir-scripts/5_score.py @@ -9,7 +9,7 @@ from natsort import natsorted from framework.context.change import ChangeMatrix -from framework.utils import rm +from framework.util.os_helpers import rm parser = argparse.ArgumentParser(description="Creates basic score data from SoDA binaries.")