diff --git a/scrub/scrubme.py b/scrub/scrubme.py index e278343..b9dcf07 100644 --- a/scrub/scrubme.py +++ b/scrub/scrubme.py @@ -229,6 +229,8 @@ def main(conf_file=pathlib.Path('./scrub.cfg').resolve(), clean=False, console_l scrub_utilities.check_artifact(tool_analysis_dir, True) # Parse the results files + logging.info('') + logging.info(' Parsing results...') parser.parse_warnings(tool_analysis_dir, scrub_conf_data) # Check the raw results files diff --git a/scrub/tools/parsers/get_codesonar_warnings.py b/scrub/tools/parsers/get_codesonar_warnings.py index 4ba7a62..b1948e0 100644 --- a/scrub/tools/parsers/get_codesonar_warnings.py +++ b/scrub/tools/parsers/get_codesonar_warnings.py @@ -126,12 +126,14 @@ def parse_xml_warnings(input_file, output_file, codesonar_hub, exclude_p10=False warning_count = warning_count + 1 -def parse_warnings(analysis_dir, tool_config_data): +def parse_warnings(analysis_dir, tool_config_data, raw_input_file=None, parsed_output_file=None): """This function handles parsing of raw CodeSonar data. Inputs: - - analysis_dir: Absolute path to the raw SonarQube output file directory [string] + - analysis_dir: Absolute path to the raw CodeSonar output file directory [string] - tool_config_data: Dictionary of scrub configuration data [dict] + - raw_input_file: Absolute path to the raw input file [string] [optional] + - parsed_output_file: Absolute path to the raw output file [string] [optional] """ # Initialize variables @@ -139,20 +141,29 @@ def parse_warnings(analysis_dir, tool_config_data): codesonar_hub = tool_config_data.get('codesonar_hub') raw_analysis_metrics_file = analysis_dir.joinpath('analysis_metrics.json') raw_file_metrics_file = analysis_dir.joinpath('file_metrics.json') - parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('codesonar_raw.scrub') parsed_metrics_file = tool_config_data.get('scrub_analysis_dir').joinpath('codesonar_metrics.csv') + # Set the input file + if raw_input_file is None: + if tool_config_data.get('codesonar_results_template'): + raw_input_file = analysis_dir.joinpath('search.xml') + else: + raw_input_file = analysis_dir.joinpath('warning_detail_search.sarif') + + # Set the output file + if parsed_output_file is None: + parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('codesonar_raw.scrub') + # Print a status message logging.info('\t>> Executing command: get_codesonar_warnings.parse_warnings(%s, %s)', analysis_dir, parsed_output_file) logging.info('\t>> From directory: %s', str(pathlib.Path().absolute())) - # Parse the SARIF results - if tool_config_data.get('codesonar_results_template'): - raw_input_file = analysis_dir.joinpath('search.xml')[0] + # Parse the results + if raw_input_file.suffix == '.xml': parse_xml_warnings(raw_input_file, parsed_output_file, codesonar_hub) else: - raw_input_file = analysis_dir.joinpath('warning_detail_search.sarif') + # Parse the SARIF file raw_warnings = translate_results.parse_sarif(raw_input_file, source_dir) # Create the SCRUB output file @@ -161,7 +172,3 @@ def parse_warnings(analysis_dir, tool_config_data): # Parse the metrics files parse_metrics.parse_codesonar_metrics(raw_analysis_metrics_file, raw_file_metrics_file, parsed_metrics_file, source_dir) - # if raw_metrics_file.exists(): - # parse_metrics.parse_codesonar_metrics(raw_metrics_file, parsed_metrics_file, source_dir) - # else: - # logging.info('\tWARNING: Metrics file not found. Check log for more details.') diff --git a/scrub/tools/parsers/get_coverity_warnings.py b/scrub/tools/parsers/get_coverity_warnings.py index b9af094..96a46a4 100644 --- a/scrub/tools/parsers/get_coverity_warnings.py +++ b/scrub/tools/parsers/get_coverity_warnings.py @@ -153,8 +153,8 @@ def parse_warnings(analysis_dir, tool_config_data): coverity_findings = translate_results.parse_sarif(analysis_dir.joinpath('coverity.sarif'), tool_config_data.get('source_dir')) - # Parse the metrics file - if (cc_threshold >= 0) and (coverity_metrics_file.exists()): + # Parse the metrics file, if necessary + if cc_threshold >= 0: coverity_findings = coverity_findings + parse_cc(coverity_metrics_file, cc_threshold) # Create the output file diff --git a/scrub/tools/parsers/get_gbuild_warnings.py b/scrub/tools/parsers/get_gbuild_warnings.py index 6b3cc3a..43474ea 100644 --- a/scrub/tools/parsers/get_gbuild_warnings.py +++ b/scrub/tools/parsers/get_gbuild_warnings.py @@ -187,17 +187,23 @@ def parse_doublecheck_warnings(raw_input_file, parsed_output_file): translate_results.create_scrub_output_file(raw_warnings, parsed_output_file) -def parse_warnings(analysis_dir, tool_config_data): +def parse_warnings(analysis_dir, tool_config_data, raw_input_file=None, parsed_output_file=None): """This function parses the raw gbuild compiler warnings into the SCRUB format. Inputs: - - raw_input_file: Absolute path to the raw gbuild compiler log containing warnings [string] - - parsed_output_file: Absolute path to the file where the parsed warnings will be stored [string] + - analysis_dir: Absolute path to the raw gbuild output file directory [string] + - tool_config_data: Dictionary of scrub configuration data [dict] + - raw_input_file: Absolute path to the raw input file [string] [optional] + - parsed_output_file: Absolute path to the raw output file [string] [optional] """ - # Initialize variables - raw_input_file = analysis_dir.joinpath('gbuild_build.log') - parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('gbuild_compiler_raw.scrub') + # Set input file + if raw_input_file is None: + raw_input_file = analysis_dir.joinpath('gbuild_build.log') + + # Set the output file + if parsed_output_file is None: + parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('gbuild_compiler_raw.scrub') # Print a status message logging.info('') diff --git a/scrub/tools/parsers/get_gcc_warnings.py b/scrub/tools/parsers/get_gcc_warnings.py index 130dd55..71ab9cc 100644 --- a/scrub/tools/parsers/get_gcc_warnings.py +++ b/scrub/tools/parsers/get_gcc_warnings.py @@ -6,7 +6,7 @@ ID_PREFIX = 'gcc' -def parse_warnings(analysis_dir, tool_config_data): +def parse_warnings(analysis_dir, tool_config_data, raw_input_file=None, parsed_output_file=None): """This function parses the raw GCC compiler warnings into the SCRUB format. Inputs: @@ -24,8 +24,14 @@ def parse_warnings(analysis_dir, tool_config_data): warning_message = [] parsing = False description = False - raw_input_file = analysis_dir.joinpath('gcc_build.log') - parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('gcc_compiler_raw.scrub') + + # Set the input file + if raw_input_file is None: + raw_input_file = analysis_dir.joinpath('gcc_build.log') + + # Set the output file + if parsed_output_file is None: + parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('gcc_compiler_raw.scrub') # Print a status message logging.info('') diff --git a/scrub/tools/parsers/get_javac_warnings.py b/scrub/tools/parsers/get_javac_warnings.py index 4f0b3b5..0d30e00 100644 --- a/scrub/tools/parsers/get_javac_warnings.py +++ b/scrub/tools/parsers/get_javac_warnings.py @@ -7,18 +7,26 @@ ID_PREFIX = 'javac' -def parse_warnings(analysis_dir, tool_config_data): +def parse_warnings(analysis_dir, tool_config_data, raw_input_file=None, parsed_output_file=None): """This function parses the raw javac compiler warnings into the SCRUB format. Inputs: - - raw_input_file: Absolute path to the raw javac compiler log containing warnings [string] - - parsed_output_file: Absolute path to the file where the parsed warnings will be stored [string] + - analysis_dir: Absolute path to the raw javac output file directory [string] + - tool_config_data: Dictionary of scrub configuration data [dict] + - raw_input_file: Absolute path to the raw input file [string] [optional] + - parsed_output_file: Absolute path to the raw output file [string] [optional] """ # Initialize variables warning_count = 1 - raw_input_file = analysis_dir.joinpath('javac_build.log') - parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('javac_compiler_raw.scrub') + + # Set the input file + if raw_input_file is None: + raw_input_file = analysis_dir.joinpath('javac_build.log') + + # Set the output file + if parsed_output_file is None: + parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('javac_compiler_raw.scrub') # Print a status message logging.info('') diff --git a/scrub/tools/parsers/get_pylint_warnings.py b/scrub/tools/parsers/get_pylint_warnings.py index 8ba7bac..4bf3ea7 100644 --- a/scrub/tools/parsers/get_pylint_warnings.py +++ b/scrub/tools/parsers/get_pylint_warnings.py @@ -6,7 +6,7 @@ ID_PREFIX = 'pylint' -def parse_warnings(analysis_dir, tool_config_data): +def parse_warnings(analysis_dir, tool_config_data, raw_input_file=None, parsed_output_file=None): """This function parses the raw PyLint warnings into the SCRUB format. Inputs: @@ -16,8 +16,14 @@ def parse_warnings(analysis_dir, tool_config_data): # Initialize the variables warning_count = 1 - raw_input_file = analysis_dir.joinpath('pylint_output.json') - parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('pylint_compiler_raw.scrub') + + # Set the input file + if raw_input_file is None: + raw_input_file = analysis_dir.joinpath('pylint_output.json') + + # Set the output file + if parsed_output_file is None: + parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('pylint_compiler_raw.scrub') # Read in the input data with open(raw_input_file, 'r') as input_fh: diff --git a/scrub/tools/parsers/get_sonarqube_warnings.py b/scrub/tools/parsers/get_sonarqube_warnings.py index ba6065e..24d3ee5 100644 --- a/scrub/tools/parsers/get_sonarqube_warnings.py +++ b/scrub/tools/parsers/get_sonarqube_warnings.py @@ -5,7 +5,7 @@ ID_PREFIX = 'sonarqube' -def parse_warnings(analysis_dir, tool_config_data): +def parse_warnings(analysis_dir, tool_config_data, parsed_output_file=None): """This function parses the raw SonarQube warnings into the SCRUB format. Inputs: @@ -18,11 +18,15 @@ def parse_warnings(analysis_dir, tool_config_data): raw_warnings = [] sonarqube_url = tool_config_data.get('sonarqube_server') source_root = tool_config_data.get('source_dir') - parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('sonarqube_raw.scrub') metrics_output_file = tool_config_data.get('scrub_analysis_dir').joinpath('sonarqube_metrics.csv') + # Set the output file + if parsed_output_file is None: + parsed_output_file = tool_config_data.get('raw_results_dir').joinpath('sonarqube_raw.scrub') + # Find all the raw findings results files in the directory - findings_results_files = analysis_dir.glob('*.json') + findings_results_files = (list(analysis_dir.glob('sonarqube_issues*.json')) + + list(analysis_dir.glob('sonarqube_hotspots*.json'))) # Iterate through every issue results file for raw_findings_file in findings_results_files: @@ -107,5 +111,5 @@ def parse_warnings(analysis_dir, tool_config_data): # Create the SCRUB output file translate_results.create_scrub_output_file(raw_warnings, parsed_output_file) - # Parse the metrics file, if it exists + # Parse the metrics data parse_metrics.parse(analysis_dir, metrics_output_file, source_root, 'sonarqube') diff --git a/scrub/tools/parsers/parse_metrics.py b/scrub/tools/parsers/parse_metrics.py index 5b7fd60..f747247 100644 --- a/scrub/tools/parsers/parse_metrics.py +++ b/scrub/tools/parsers/parse_metrics.py @@ -98,6 +98,7 @@ def parse_codesonar_metrics(raw_analysis_metrics_file, raw_file_metrics_file, pa """ # Initialize variables + cleaned_metrics_data = {'project_metrics': {}} metrics_list = {'Top-level file instances': 'Number of Files', 'User-defined functions': 'Number of Functions', 'Total Lines': 'Total Lines', @@ -115,56 +116,62 @@ def parse_codesonar_metrics(raw_analysis_metrics_file, raw_file_metrics_file, pa 'Mixed Lines': 'In-Line Comments', 'Include file instances': 'Number of Includes'} - # Read in the analysis-level metrics file - with open(raw_analysis_metrics_file, 'r') as input_fh: - analysis_metrics_data = json.load(input_fh) - - # Read in the file-level metrics file - with open(raw_file_metrics_file, 'r') as input_fh: - file_metrics_data = json.load(input_fh) - - # Update the data structure - cleaned_metrics_data = {'project_metrics': {}} - for metric in analysis_metrics_data.get('metrics'): - if metric.get('granularity').lower() == 'analysis': - cleaned_metrics_data['project_metrics'][metric.get('description')] = (metric.get('rows')[0] - .get('metricValue')) - - # Update the data structure - file_list = [] - file_path = None - for metric in file_metrics_data.get('metrics'): - if metric.get('granularity').lower() == 'file': - for file_metric in metric.get('rows'): - # Find the file name in the source tree - file_name = file_metric.get('file') - file_search = source_root.rglob(file_name) - if len(list(file_search)) == 1: - file_path = str(next(source_root.rglob(file_name)).relative_to(source_root)) - else: - print('ERROR: Could not resolve file path {}'.format(file_name)) - - # Add the file to the list - if file_path not in cleaned_metrics_data.keys(): - cleaned_metrics_data[file_path] = {} - file_list.append(file_path) - - # Add the metric to the dictionary - cleaned_metrics_data[file_path][metric.get('description')] = file_metric.get('metricValue') - cleaned_metrics_data[file_path]['Top-level file instances'] = 1 - - # Calculate comment density - for item in cleaned_metrics_data.keys(): - comment_density = round(int(cleaned_metrics_data[item]['Comment Lines']) / - int(cleaned_metrics_data[item]['Code Lines']) * 100, 2) - cleaned_metrics_data[item]['Comment Density'] = comment_density - - # Check to make sure we have data - if cleaned_metrics_data: - # Generate the output file - create_output_file(cleaned_metrics_data, parsed_output_file, metrics_list, file_list) + # Parse the metrics files if they exist + if raw_analysis_metrics_file.exists() and raw_file_metrics_file.exists(): + # Read in the analysis-level metrics file + with open(raw_analysis_metrics_file, 'r') as input_fh: + analysis_metrics_data = json.load(input_fh) + + # Read in the file-level metrics file + with open(raw_file_metrics_file, 'r') as input_fh: + file_metrics_data = json.load(input_fh) + + # Update the data structure + for metric in analysis_metrics_data.get('metrics'): + if metric.get('granularity').lower() == 'analysis': + cleaned_metrics_data['project_metrics'][metric.get('description')] = (metric.get('rows')[0] + .get('metricValue')) + + # Update the data structure + file_list = [] + file_path = None + for metric in file_metrics_data.get('metrics'): + if metric.get('granularity').lower() == 'file': + for file_metric in metric.get('rows'): + # Find the file name in the source tree + file_name = file_metric.get('file') + file_search = source_root.rglob(file_name) + if len(list(file_search)) == 1: + file_path = str(next(source_root.rglob(file_name)).relative_to(source_root)) + else: + print('ERROR: Could not resolve file path {}'.format(file_name)) + + # Add the file to the list + if file_path not in cleaned_metrics_data.keys(): + cleaned_metrics_data[file_path] = {} + file_list.append(file_path) + + # Add the metric to the dictionary + cleaned_metrics_data[file_path][metric.get('description')] = file_metric.get('metricValue') + cleaned_metrics_data[file_path]['Top-level file instances'] = 1 + + # Calculate comment density + for item in cleaned_metrics_data.keys(): + if int(cleaned_metrics_data[item]['Code Lines']) != 0: + comment_density = round(int(cleaned_metrics_data[item]['Comment Lines']) / + int(cleaned_metrics_data[item]['Code Lines']) * 100, 2) + else: + comment_density = 'N/A' + cleaned_metrics_data[item]['Comment Density'] = comment_density + + # Check to make sure we have data + if cleaned_metrics_data: + # Generate the output file + create_output_file(cleaned_metrics_data, parsed_output_file, metrics_list, file_list) + else: + logging.warning('\tCould not parse metrics data. Check log for more information.') else: - logging.warning('\tCould not generate metrics file. Check log for more information.') + logging.warning('\tSome metrics data is missing. Check log for more information.') return cleaned_metrics_data @@ -182,6 +189,8 @@ def parse_sonarqube_metrics(metrics_directory, parsed_output_file): # Initialize variables cleaned_metrics_data = {} + project_metrics_file = metrics_directory.joinpath('sonarqube_metrics_project.json') + metrics_files = list(metrics_directory.glob('sonarqube_metrics_file_*.json')) metrics_list = {'files': 'Number of Files', 'functions': 'Number of Functions', 'lines': 'Total Lines', @@ -199,10 +208,8 @@ def parse_sonarqube_metrics(metrics_directory, parsed_output_file): 'sqale_index': 'SQALE Index', 'duplicated_lines_density': 'Duplication Density'} - # Read in the project metrics file, if it exists - project_metrics_file = metrics_directory.joinpath('sonarqube_metrics_project.json') - - if project_metrics_file.exists(): + # Read in the metrics data, if it exists + if project_metrics_file.exists() and metrics_files: with open(project_metrics_file, 'r') as input_fh: project_metrics_data = json.load(input_fh) @@ -216,50 +223,45 @@ def parse_sonarqube_metrics(metrics_directory, parsed_output_file): comment_density = round(int(cleaned_metrics_data['project_metrics']['comment_lines']) / int(cleaned_metrics_data['project_metrics']['ncloc']) * 100, 2) cleaned_metrics_data['project_metrics']['comment_density'] = comment_density - else: - logging.warning('\tProject-level metrics data is missing. Check log for more details.') - # Find all the file level metrics - metrics_files = list(metrics_directory.glob('sonarqube_metrics_file_*.json')) + # Parse the file level metrics + file_list = [] + if len(metrics_files) > 0: + for metrics_file in metrics_files: + with open(metrics_file, 'r') as input_fh: + file_metrics_data = json.load(input_fh) + + # Parse every component + for source_file in file_metrics_data.get('components'): + for file_metric in source_file.get('measures'): + if source_file.get('path') not in cleaned_metrics_data.keys(): + cleaned_metrics_data[source_file.get('path')] = {} + file_list.append(source_file.get('path')) + cleaned_metrics_data[source_file.get('path')][file_metric.get('metric')] = float(file_metric + .get('value')) + + # Add in the comment density information + if cleaned_metrics_data[source_file['path']]['ncloc'] > 0: + comment_density = round(int(cleaned_metrics_data[source_file['path']]['comment_lines']) / + int(cleaned_metrics_data[source_file['path']]['ncloc']) * 100, 2) + else: + comment_density = 'N/A' + cleaned_metrics_data[source_file.get('path')]['comment_density'] = comment_density + + # Update complexity measurement if necessary + if 'complexity' not in cleaned_metrics_data[source_file.get('path')].keys(): + cleaned_metrics_data[source_file.get('path')]['complexity'] = 0 + # Update functions count measurement if necessary + if 'functions' not in cleaned_metrics_data[source_file.get('path')].keys(): + cleaned_metrics_data[source_file.get('path')]['functions'] = 0 - # Parse the file level metrics - file_list = [] - if len(metrics_files) > 0: - for metrics_file in metrics_files: - with open(metrics_file, 'r') as input_fh: - file_metrics_data = json.load(input_fh) - - # Parse every component - for source_file in file_metrics_data.get('components'): - for file_metric in source_file.get('measures'): - if source_file.get('path') not in cleaned_metrics_data.keys(): - cleaned_metrics_data[source_file.get('path')] = {} - file_list.append(source_file.get('path')) - cleaned_metrics_data[source_file.get('path')][file_metric.get('metric')] = float(file_metric - .get('value')) - - # Add in the comment density information - if cleaned_metrics_data[source_file['path']]['ncloc'] > 0: - comment_density = round(int(cleaned_metrics_data[source_file['path']]['comment_lines']) / - int(cleaned_metrics_data[source_file['path']]['ncloc']) * 100, 2) - else: - comment_density = 'N/A' - cleaned_metrics_data[source_file.get('path')]['comment_density'] = comment_density - - # Update complexity measurement if necessary - if 'complexity' not in cleaned_metrics_data[source_file.get('path')].keys(): - cleaned_metrics_data[source_file.get('path')]['complexity'] = 0 - # Update functions count measurement if necessary - if 'functions' not in cleaned_metrics_data[source_file.get('path')].keys(): - cleaned_metrics_data[source_file.get('path')]['functions'] = 0 - else: - logging.warning('\tFile-level metrics data is missing. Check log for more details.') - - # Generate the output file - if cleaned_metrics_data: - create_output_file(cleaned_metrics_data, parsed_output_file, metrics_list, file_list) + # Generate the output file + if cleaned_metrics_data: + create_output_file(cleaned_metrics_data, parsed_output_file, metrics_list, file_list) + else: + logging.warning('\tCould parse metrics data. Check log for more details.') else: - logging.warning('\tCould not generate metrics output. Check log for more details.') + logging.warning('\tSome metrics data is missing. Check log for more information.') return cleaned_metrics_data @@ -276,27 +278,30 @@ def parse_coverity_metrics(metrics_directory, parsed_output_file): """ # Initialize variables + project_metrics_file = metrics_directory.joinpath('output/ANALYSIS.metrics.xml') + cleaned_metrics_data = {'project_metrics': {}} metrics_list = {'files-analyzed': 'Number of Files', 'function-metrics-count': 'Number of Functions', 'code-lines': 'Lines of Code', 'comment-lines': 'Number of Comments'} # Read in the project metrics file - project_metrics_file = metrics_directory.joinpath('output/ANALYSIS.metrics.xml') - project_metrics_tree = xml.etree.ElementTree.parse(project_metrics_file) + if project_metrics_file.exists(): + project_metrics_tree = xml.etree.ElementTree.parse(project_metrics_file) - # Parse the project level metrics - cleaned_metrics_data = {'project_metrics': {}} - for raw_metric_data in project_metrics_tree.findall('metrics/metric'): - metric_name = raw_metric_data.find('name').text - metric_value = raw_metric_data.find('value').text + # Parse the project level metrics + for raw_metric_data in project_metrics_tree.findall('metrics/metric'): + metric_name = raw_metric_data.find('name').text + metric_value = raw_metric_data.find('value').text - # Gather only the metrics of interest - if metric_name in metrics_list: - cleaned_metrics_data['project_metrics'][metric_name] = metric_value + # Gather only the metrics of interest + if metric_name in metrics_list: + cleaned_metrics_data['project_metrics'][metric_name] = metric_value - # Generate the output file - create_output_file(cleaned_metrics_data, parsed_output_file, metrics_list, []) + # Generate the output file + create_output_file(cleaned_metrics_data, parsed_output_file, metrics_list, []) + else: + logging.warning('\tSome metrics data is missing. Check log for more information.') return cleaned_metrics_data diff --git a/scrub/tools/parsers/translate_results.py b/scrub/tools/parsers/translate_results.py index 626d5d6..b1ffa67 100644 --- a/scrub/tools/parsers/translate_results.py +++ b/scrub/tools/parsers/translate_results.py @@ -233,6 +233,7 @@ def format_sarif_for_upload(input_file, output_file, source_root, upload_format) # Initialize variables formatted_results = [] + tool_name = input_file.stem # Import the SARIF results unformatted_results = parse_sarif(input_file, source_root) @@ -248,7 +249,7 @@ def format_sarif_for_upload(input_file, output_file, source_root, upload_format) warning['description'] = [warning['description'][0]] formatted_results.append(warning) - create_sarif_output_file(formatted_results, '2.1.0', output_file, source_root) + create_sarif_output_file(formatted_results, '2.1.0', output_file, source_root, tool_name) elif upload_format == 'codesonar': # shutil.copyfile(input_file, output_file) for warning in unformatted_results: @@ -256,7 +257,7 @@ def format_sarif_for_upload(input_file, output_file, source_root, upload_format) warning['tool'] = 'external-' + warning['tool'] warning['query'] = warning['tool'].title() + ' ' + warning['query'] formatted_results.append(warning) - create_sarif_output_file(formatted_results, '2.1.0', output_file, source_root) + create_sarif_output_file(formatted_results, '2.1.0', output_file, source_root, tool_name) def parse_sarif(sarif_filename, source_root): @@ -376,7 +377,7 @@ def parse_sarif(sarif_filename, source_root): return results -def create_sarif_output_file(results_list, sarif_version, output_file, source_root): +def create_sarif_output_file(results_list, sarif_version, output_file, source_root, tool_name): """This function creates a SARIF formatted output file. Inputs: @@ -384,6 +385,7 @@ def create_sarif_output_file(results_list, sarif_version, output_file, source_ro - sarif_version: - output_file: - source_root: Absolute path of source root directory [string] + - tool_name: Name of scanning tool [string] Returns: - output_file is created at the specified location @@ -397,6 +399,12 @@ def create_sarif_output_file(results_list, sarif_version, output_file, source_ro '$schema': 'https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json', 'runs': [ { + 'tool': { + 'driver': { + 'name': tool_name + }, + 'rules': [] + }, 'results': [] } ] @@ -455,13 +463,7 @@ def create_sarif_output_file(results_list, sarif_version, output_file, source_ro 'text': rule } }) - - sarif_output['runs'][0]['tool'] = { - 'driver': { - 'name': results_list[0]['tool'], - 'rules': sarif_rules - } - } + sarif_output['runs'][0]['tool']['rules'] = sarif_rules result_item['locations'] = [{ 'physicalLocation': { 'artifactLocation': { @@ -540,6 +542,7 @@ def perform_translation(input_file, output_file, source_root, output_format): # Initialize the variables exit_code = 1 parsed_results = [] + tool_name = input_file.stem try: # Parse the input file @@ -562,7 +565,7 @@ def perform_translation(input_file, output_file, source_root, output_format): sarif_version = output_format.strip('sarifv') # Generate the output file - create_sarif_output_file(parsed_results, sarif_version, output_file, source_root) + create_sarif_output_file(parsed_results, sarif_version, output_file, source_root, tool_name) else: # TODO: This should generate an exception diff --git a/scrub/tools/templates/coverity.template b/scrub/tools/templates/coverity.template index 84c8c13..b428ac9 100644 --- a/scrub/tools/templates/coverity.template +++ b/scrub/tools/templates/coverity.template @@ -33,6 +33,9 @@ fi if [[ "${{SOURCE_LANG}}" =~ "python" ]]; then find ${{SOURCE_DIR}} -iname "*.py" >> "$file_list" fi +if [[ "${{SOURCE_LANG}}" =~ "py" ]]; then + find ${{SOURCE_DIR}} -iname "*.py" >> "$file_list" +fi if [[ "${{SOURCE_LANG}}" =~ "ruby" ]]; then find ${{SOURCE_DIR}} -iname "*.rb" >> "$file_list" fi diff --git a/scrub/tools/templates/pylint.template b/scrub/tools/templates/pylint.template index 71d9dc4..ab1cfd8 100644 --- a/scrub/tools/templates/pylint.template +++ b/scrub/tools/templates/pylint.template @@ -4,4 +4,4 @@ pylint --version # Perform Pylint analysis -find ${{SOURCE_DIR}} -type f -name '*.py' | xargs pylint ${{PYLINT_FLAGS}} --output ${{TOOL_ANALYSIS_DIR}}/pylint_output.json --output-format json +pylint ${{SOURCE_DIR}} ${{PYLINT_FLAGS}} --exit-zero --output ${{TOOL_ANALYSIS_DIR}}/pylint_output.json --output-format json \ No newline at end of file diff --git a/scrub/tools/templates/sonarqube.template b/scrub/tools/templates/sonarqube.template index 27bd55f..bb25f66 100644 --- a/scrub/tools/templates/sonarqube.template +++ b/scrub/tools/templates/sonarqube.template @@ -53,6 +53,9 @@ fi if [[ "${{SOURCE_LANG}}" =~ "python" ]]; then file_extension_filters="$file_extension_filters,**/*.py" fi +if [[ "${{SOURCE_LANG}}" =~ "py" ]]; then + file_extension_filters="$file_extension_filters,**/*.py" +fi if [[ "${{SOURCE_LANG}}" =~ "rpg" ]]; then file_extension_filters="$file_extension_filters,**/*.rpg,**/*.rpgle,**/*.sqlrpgle,**/*.RPG,**/*.RPGLE,**/*.SQLRPGLE" fi @@ -141,7 +144,7 @@ fi PAGE_SIZE=500 # Get the first page RESULTS_FILE=${{TOOL_ANALYSIS_DIR}}/sonarqube_issues_1.json -curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/issues/search?ps=$PAGE_SIZE&componentKeys=${{SONARQUBE_PROJECT}}&p=1&${{SONARQUBE_CURL_FLAGS}}" -o $RESULTS_FILE +curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/issues/search?ps=$PAGE_SIZE&componentKeys=${{SONARQUBE_PROJECT}}&p=1&languages=${{SOURCE_LANG}}${{SONARQUBE_CURL_FLAGS}}" -o $RESULTS_FILE # Get the number of remaining pages TOTAL_RESULTS=$(grep -E '[0-9]+' -m 1 -o -a $RESULTS_FILE | sed -n 1p) @@ -154,23 +157,15 @@ fi for ((CURRENT_PAGE=2; CURRENT_PAGE <= TOTAL_PAGES; CURRENT_PAGE++)); do # Get the page - RESULTS_FILE=${{TOOL_ANALYSIS_DIR}}/sonarqube_issues_$PAGE.json - curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/issues/search?ps=500&componentKeys=${{SONARQUBE_PROJECT}}&languages=${{SOURCE_LANG}}&p=$PAGE&${{SONARQUBE_CURL_FLAGS}}" -o $RESULTS_FILE - # Check to see if the file is empty + RESULTS_FILE=${{TOOL_ANALYSIS_DIR}}/sonarqube_issues_$CURRENT_PAGE.json + curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/issues/search?ps=500&componentKeys=${{SONARQUBE_PROJECT}}&languages=${{SOURCE_LANG}}&p=$CURRENT_PAGE&${{SONARQUBE_CURL_FLAGS}}" -o $RESULTS_FILE + + # Check the file contents if [ ! -s "$RESULTS_FILE" ]; then exit 1 - fi - # Check the contents, verify file is not empty, and make sure the max page hasn't been reached - if grep -q "Can return only the first 10000 results" $RESULTS_FILE; then - echo "WARNING: Not all results have been retrieved." - MORE_RESULTS=false - elif [ $PAGE -gt 20 ]; then - MORE_RESULTS=false - elif grep -q "\"issues\":\[\]" $RESULTS_FILE; then - rm -f $RESULTS_FILE - MORE_RESULTS=false - else - PAGE=$((PAGE+1)) + elif grep -q "Can return only the first 10000 results" $RESULTS_FILE; then + rm -f $RESULTS_FILE + break fi done @@ -192,25 +187,43 @@ do # Get the page RESULTS_FILE=${{TOOL_ANALYSIS_DIR}}/sonarqube_hotspots_$CURRENT_PAGE.json curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/hotspots/search?ps=$PAGE_SIZE&projectKey=${{SONARQUBE_PROJECT}}&p=$CURRENT_PAGE&${{SONARQUBE_CURL_FLAGS}}" -o $RESULTS_FILE + + # Check the file contents + if [ ! -s "$RESULTS_FILE" ]; then + exit 1 + elif grep -q "Can return only the first 10000 results" $RESULTS_FILE; then + rm -f $RESULTS_FILE + break + fi done # Get project metrics from the SonarQube server curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/measures/component_tree?component=${{SONARQUBE_PROJECT}}&ps=500&qualifiers=TRK&metricKeys=files,functions,lines,ncloc,comment_lines,complexity,cognitive_complexity,violations,vulnerabilities,security_hotspots,coverage,line_coverage,branch_coverage,sqale_index,duplicated_lines_density" -o "${{TOOL_ANALYSIS_DIR}}/sonarqube_metrics_project.json" -# Get the file level metrics from the SonarQube server -PAGE=1 -MORE_RESULTS=true -while $MORE_RESULTS; do - METRICS_FILE="${{TOOL_ANALYSIS_DIR}}/sonarqube_metrics_file_$PAGE.json" - curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/measures/component_tree?component=${{SONARQUBE_PROJECT}}&ps=500&p=$PAGE&qualifiers=FIL&strategy=all&metricKeys=files,functions,lines,ncloc,comment_lines,complexity,cognitive_complexity,violations,vulnerabilities,security_hotspots,coverage,line_coverage,branch_coverage,sqale_index,duplicated_lines_density" -o "${{TOOL_ANALYSIS_DIR}}/sonarqube_metrics_file_$PAGE.json" - - # Check if there are more results - if grep -q "\"components\":\[\]" $METRICS_FILE; then - rm -f $METRICS_FILE - MORE_RESULTS=false - elif [ $PAGE -gt 20 ]; then - MORE_RESULTS=false - else - PAGE=$((PAGE+1)) +# Retrieve the file metrics from the SonarQube server +# Get the first page +METRICS_FILE="${{TOOL_ANALYSIS_DIR}}/sonarqube_metrics_file_1.json" +curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/measures/component_tree?component=${{SONARQUBE_PROJECT}}&ps=500&p=1&qualifiers=FIL&strategy=all&metricKeys=files,functions,lines,ncloc,comment_lines,complexity,cognitive_complexity,violations,vulnerabilities,security_hotspots,coverage,line_coverage,branch_coverage,sqale_index,duplicated_lines_density" -o $METRICS_FILE + +# Get the number of remaining pages +TOTAL_RESULTS=$(( $(grep -E '[0-9]+' -m 1 -o -a $METRICS_FILE | sed -n 3p) )) +TOTAL_PAGES=$(( ( TOTAL_RESULTS / PAGE_SIZE ) + ( TOTAL_RESULTS % PAGE_SIZE > 0 ) )) +if (( TOTAL_PAGES > 20 )); then + TOTAL_PAGES=20 +fi + +# Get the rest of the metrics pages +for ((CURRENT_PAGE=2; CURRENT_PAGE <= TOTAL_PAGES; CURRENT_PAGE++)); +do + # Get the page + METRICS_FILE="${{TOOL_ANALYSIS_DIR}}/sonarqube_metrics_file_$CURRENT_PAGE.json" + curl -u ${{SONARQUBE_TOKEN}}: "${{SONARQUBE_SERVER}}/api/measures/component_tree?component=${{SONARQUBE_PROJECT}}&ps=500&p=$CURRENT_PAGE&qualifiers=FIL&strategy=all&metricKeys=files,functions,lines,ncloc,comment_lines,complexity,cognitive_complexity,violations,vulnerabilities,security_hotspots,coverage,line_coverage,branch_coverage,sqale_index,duplicated_lines_density" -o $METRICS_FILE + + # Check the file contents + if [ ! -s "$METRICS_FILE" ]; then + exit 1 + elif grep -q "Can return only the first 10000 results" $METRICS_FILE; then + rm -f $METRICS_FILE + break fi done \ No newline at end of file diff --git a/scrub/utils/scrub_utilities.py b/scrub/utils/scrub_utilities.py index 7de6fcc..4dad0a1 100644 --- a/scrub/utils/scrub_utilities.py +++ b/scrub/utils/scrub_utilities.py @@ -372,8 +372,8 @@ def parse_common_configs(user_conf_file, raw_override_values): source_langs[i] = 'c,cpp' elif source_lang == 'j': source_langs[i] = 'java' - elif source_lang == 'p': - source_langs[i] = 'python' + elif source_lang == 'p' or source_lang == 'python': + source_langs[i] = 'py' elif source_lang == 'js': source_langs[i] = 'javascript' scrub_conf_data.update({'source_lang': ','.join(source_langs)}) diff --git a/setup.cfg b/setup.cfg index 7335b02..4a91592 100644 --- a/setup.cfg +++ b/setup.cfg @@ -24,7 +24,7 @@ version = 3.0 [options] include_package_data = True -install_requires = +install_requires = sarif-tools packages = find: setup_requires = setuptools diff --git a/tests/test_integration.py b/tests/test_integration.py index 17236bf..f2933c4 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -4,6 +4,7 @@ import pytest import pathlib import traceback +import shutil from scrub import scrub_cli from scrub.tools.parsers import get_codesonar_warnings from scrub.tools.parsers import get_coverity_warnings @@ -13,6 +14,7 @@ from scrub.tools.parsers import get_pylint_warnings from scrub.tools.parsers import get_sonarqube_warnings from scrub.tools.parsers import translate_results +from scrub import utils # Initialize variables @@ -38,29 +40,36 @@ multi_lang_testcase = pathlib.Path(__file__).parent.joinpath('integration_tests/multi_lang_testcase') diff_testcase = pathlib.Path(__file__).parent.joinpath('integration_tests/diff_testcase') raw_files = pathlib.Path(__file__).parent.joinpath('integration_tests/parsers').glob('*') +parser_config_data = utils.scrub_utilities.parse_common_configs(c_testcase.joinpath('scrub.cfg'), None) @pytest.mark.parametrize("raw_file", raw_files) def test_parser(raw_file, capsys): output_file = raw_file.parent.joinpath(raw_file.stem + '_output.scrub') if 'codesonar' in raw_file.stem and raw_file.suffix == '.xml': - get_codesonar_warnings.parse_warnings(raw_file, output_file, os.getenv('CODESONAR_HUB')) - elif 'codesonar' in raw_file.stem and raw_file.suffix == '.sarif': + get_codesonar_warnings.parse_warnings(pathlib.Path(__file__).parent.joinpath('integration_tests/parsers'), + parser_config_data, raw_file, output_file) + elif raw_file.suffix == '.sarif': translate_results.perform_translation(raw_file, output_file, c_testcase, 'scrub') elif 'codeql' in raw_file.stem: translate_results.perform_translation(raw_file, output_file, pathlib.Path(c_testcase), 'scrub') elif 'coverity' in raw_file.stem: - get_coverity_warnings.parse_json(raw_file, output_file) + warnings = get_coverity_warnings.parse_json(raw_file) + translate_results.create_scrub_output_file(warnings, output_file) elif 'gbuild' in raw_file.stem: - get_gbuild_warnings.parse_warnings(raw_file, output_file) + get_gbuild_warnings.parse_warnings(pathlib.Path(__file__).parent.joinpath('integration_tests/parsers'), + parser_config_data, raw_file, output_file) elif 'gcc' in raw_file.stem: - get_gcc_warnings.parse_warnings(raw_file, output_file) + get_gcc_warnings.parse_warnings(pathlib.Path(__file__).parent.joinpath('integration_tests/parsers'), + parser_config_data, raw_file, output_file) elif 'java' in raw_file.stem: - get_javac_warnings.parse_warnings(raw_file, output_file) + get_javac_warnings.parse_warnings(pathlib.Path(__file__).parent.joinpath('integration_tests/parsers'), + parser_config_data, raw_file, output_file) elif 'pylint' in raw_file.stem: - get_pylint_warnings.parse_warnings(raw_file, output_file) + get_pylint_warnings.parse_warnings(pathlib.Path(__file__).parent.joinpath('integration_tests/parsers'), + parser_config_data, raw_file, output_file) elif 'sonarqube' in raw_file.stem: - get_sonarqube_warnings.parse_warnings(raw_file.parent, output_file, pathlib.Path(c_testcase), os.getenv('SONARQUBE_SERVER')) + get_sonarqube_warnings.parse_warnings(raw_file, parser_config_data, output_file) # Verify output assert output_file.exists()