diff --git a/README.md b/README.md index 65179ca..914f97c 100644 --- a/README.md +++ b/README.md @@ -169,6 +169,8 @@ docker run -v$PWD/code:/code -v$PWD/reporting:/app/reporting punksecurity/saist | `--web` | Launch a local web server | | `--disable-tools` | Disable tool use during file analysis to reduce LLM token usage | | `--disable-caching` | Disable finding caching during file analysis | +| `--skip-line-length-check` | Skip checking files for a maximum line length | +| `--max-line-length` | Maximum allowed line length, files with lines longer than this value will be skipped | | `--cache-folder` | Change the default cache folder | | `--csv` | Output findings to `findings.csv` | | `--pdf` | Output findings to PDF report (`report.pdf`) | diff --git a/saist/main.py b/saist/main.py index 92f458c..954e22b 100755 --- a/saist/main.py +++ b/saist/main.py @@ -21,7 +21,7 @@ from scm import BaseScmAdapter from scm.adapters.git import GitAdapter from util.git import parse_unified_diff -from util.filtering import should_process +from util.filtering import filename_included, file_exceeds_line_length_limit from util.prompts import prompts from scm.adapters.github import Github from scm import Scm @@ -198,9 +198,19 @@ async def main(): for f in changed_files: filename = f["filename"] patch_text = f.get("patch", "") - if not patch_text or not should_process(filename): + if not patch_text: + logging.debug(f"Skipped file {filename} as it contains no patch text") + continue + + if not filename_included(filename): + logging.debug(f"Skipped file {filename} as it is not included in rules") continue + if not args.skip_line_length_check: + if file_exceeds_line_length_limit(file_content=await scm.read_file_contents(filename), patch_text=patch_text, max_line_length=args.max_line_length): + logging.debug(f"Skipped file {filename} as it contains lines that exceed the maximum line length ({args.max_line_length})") + continue + line_map, new_lines_text = parse_unified_diff(patch_text) file_line_maps[filename] = line_map file_new_lines_text[filename] = new_lines_text diff --git a/saist/util/argparsing.py b/saist/util/argparsing.py index 9e9d13a..b8c1062 100644 --- a/saist/util/argparsing.py +++ b/saist/util/argparsing.py @@ -2,7 +2,9 @@ from os import linesep, environ, cpu_count import sys from shutil import which +from dotenv import load_dotenv +load_dotenv(".env") runtime = environ.get("SAIST_COMMAND", f"{sys.argv[0]}") @@ -223,6 +225,16 @@ def __call__(self, parser, namespace, values, option_string=None): envvar="SAIST_PROJECT_NAME", action=EnvDefault, required=False, default=None ) +parser.add_argument( + "--skip-line-length-check", help = "Skip checking files for a maximum line length", + action='store_true', required=False + ) + +parser.add_argument( + "--max-line-length", type=int, help = "Maximum allowed line length, files with lines longer than this value will be skipped", + required=False, default=1000 + ) + parser.add_argument( "-v", "--verbose", diff --git a/saist/util/filtering.py b/saist/util/filtering.py index 5074d9a..58e30ef 100644 --- a/saist/util/filtering.py +++ b/saist/util/filtering.py @@ -38,15 +38,34 @@ def pattern_match(filepath, patterns): normalized_path = filepath.replace("\\", "/") return any(fnmatch.fnmatch(normalized_path, pattern) for pattern in patterns) -def should_process(filepath): +# if file_exceeds_line_length_limit(file_content=scm.read_file_contents(), patch_text=patch_text, max_line_length=args.max_line_length): + +def file_exceeds_line_length_limit(file_content: str, patch_text: str, max_line_length: int = 1000): + """ + Checks if any line in the file exceeds max_length. + Returns True if all lines are within the limit, False otherwise. + """ + for line in file_content.splitlines(): + if len(line) > max_line_length: + return True + + for line in patch_text.splitlines(): + if len(line) > max_line_length: + return True + + return False + +def filename_included(filepath: str): """ - Returns True if the file should be processed (included AND not ignored). + Returns True if the file is included in includelist and not explicitly ignored in ignorelist. """ + # Check include/ignore rules logger.debug(f"should_process: {filepath}") if not pattern_match(filepath, include_patterns): return False if pattern_match(filepath, ignore_patterns): return False + return True include_patterns = load_patterns("saist.include")