Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/saist.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"
python-version: "3.13"

- name: Install Dependencies
run: |
Expand Down
26 changes: 19 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,23 @@ saist respects **file include/exclude rules** via two optional files in the root

| File | Purpose |
| ----------------- | ---------------------------------- |
| `saist.include` | List of glob patterns to **include** |
| `saist.ignore` | List of glob patterns to **ignore** |
| `saist.include` | List of `.gitignore`-style patterns to **include** |
| `saist.ignore` | List of `.gitignore`-style patterns to **ignore** |

- Patterns follow `glob` syntax (similar to `.gitignore`).
- Patterns follow `.gitignore` syntax.
- If **`saist.include`** does not exist, default extensions are used (e.g., `.py`, `.js`, `.java`, `.go`, etc).
- Examples:
- `**/*.py` includes all Python files
- `src/**/*.ts` includes TypeScript files inside `src`
- `tests/**` in `saist.ignore` will ignore the entire tests folder
- `build/` will ignore the entire build folder
- `*.log` will ignore all log files

> Note: saist currently does basic glob pattern matching. More advanced `.gitignore`-style support is coming soon!
You can also provide include/exclude patterns using the command-line arguments `--include` and `--exclude`.
- Patterns provided via command-line arguments are appended to any patterns loaded from the rule files.
- Examples:
- `--include '**/*.py' --include '**/*.ts'` includes all Python and TypeScript files
- `--include '**' --exclude '*.log'` includes all files except those ending in `.log`
- `--exclude 'node_modules/'` excludes the entire `node_modules` directory
---

### 📝 Example
Expand All @@ -119,15 +125,16 @@ src/**/*.js

`saist.ignore`
```
tests/**
docs/**
tests/
docs/
```

This setup will:
- Only scan `.py`, `.ts`, and specific `.js` files
- Ignore anything under `tests/` and `docs/`
---


## 📄 PDF report generation

saist allows you to generate PDF reports summarizing your findings, making it easier to share insights with your team.
Expand Down Expand Up @@ -169,6 +176,11 @@ docker run -v$PWD/code:/code -v$PWD/reporting:/app/reporting punksecurity/saist
| `--web` | Launch a local web server |
| `--disable-tools` | Disable tool use during file analysis to reduce LLM token usage |
| `--disable-caching` | Disable finding caching during file analysis |
| `--skip-line-length-check` | Skip checking files for a maximum line length |
| `--max-line-length` | Maximum allowed line length, files with lines longer than this value will be skipped |
| `--i, --include` | Pattern to explicitly include |
| `--e, --exclude` | Pattern to explicitly ignore |
| `--dry-run` | Exit after parsing configuration and collecting files, does not perform any analysis, useful for validating rules |
| `--cache-folder` | Change the default cache folder |
| `--csv` | Output findings to `findings.csv` |
| `--pdf` | Output findings to PDF report (`report.pdf`) |
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ aiofiles==24.1.0
rich==14.0.0
flask==3.1.1
ollama==0.5.1
gitignore-parser==0.1.13
2 changes: 1 addition & 1 deletion saist.ignore
Original file line number Diff line number Diff line change
@@ -1 +1 @@
.github/**
.github/
22 changes: 20 additions & 2 deletions saist/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from scm import BaseScmAdapter
from scm.adapters.git import GitAdapter
from util.git import parse_unified_diff
from util.filtering import should_process
from util.filtering import FilterRules
from util.prompts import prompts
from scm.adapters.github import Github
from scm import Scm
Expand Down Expand Up @@ -195,12 +195,23 @@ async def main():
file_new_lines_text = {}
app_files = []

filter_rules = FilterRules(args.include, args.exclude)
for f in changed_files:
filename = f["filename"]
patch_text = f.get("patch", "")
if not patch_text or not should_process(filename):
if not patch_text:
logging.debug(f"Skipped file {filename} as it contains no patch text")
continue

if not filter_rules.filename_included(filename):
logging.debug(f"Skipped file {filename} as it is not included in rules")
continue

if not args.skip_line_length_check:
if filter_rules.file_exceeds_line_length_limit(file_content=await scm.read_file_contents(filename), patch_text=patch_text, max_line_length=args.max_line_length):
logging.debug(f"Skipped file {filename} as it contains lines that exceed the maximum line length ({args.max_line_length})")
continue

line_map, new_lines_text = parse_unified_diff(patch_text)
file_line_maps[filename] = line_map
file_new_lines_text[filename] = new_lines_text
Expand All @@ -211,6 +222,13 @@ async def main():
return
print(f"✅ Prepared {len(app_files)} app files for analysis.\n")

app_filenames = list((filename for filename,_ in app_files))
logging.debug(f"Files to process: {app_filenames}")

if args.dry_run:
print("⚠️ --dry-run flag passed, exiting without analyzing files.")
exit(0)

# 3) Analyze each file in parallel
print("🔍 Analyzing files for security issues...")
max_workers = min(args.llm_rate_limit, len(app_files))
Expand Down
25 changes: 25 additions & 0 deletions saist/util/argparsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from os import linesep, environ, cpu_count
import sys
from shutil import which
from dotenv import load_dotenv

load_dotenv(".env")

runtime = environ.get("SAIST_COMMAND", f"{sys.argv[0]}")

Expand Down Expand Up @@ -223,6 +225,29 @@ def __call__(self, parser, namespace, values, option_string=None):
envvar="SAIST_PROJECT_NAME", action=EnvDefault, required=False, default=None
)

parser.add_argument(
"--skip-line-length-check", help = "Skip checking files for a maximum line length",
action='store_true', required=False
)

parser.add_argument(
"--max-line-length", type=int, help = "Maximum allowed line length, files with lines longer than this value will be skipped",
required=False, default=1000
)

parser.add_argument(
'-i', '--include', action='append', help = "Pattern or filename to explicitly include, overrides saist.ignore", nargs=1, required=False
)

parser.add_argument(
'-e', '--exclude', action='append', help = "Pattern or filename to explicitly ignore, overrides saist.include", nargs=1, required=False
)

parser.add_argument(
"--dry-run", help = "Exit after parsing configuration and collecting files, does not perform any analysis. Useful for validating rules.",
action='store_true', required=False
)

parser.add_argument(
"-v",
"--verbose",
Expand Down
113 changes: 72 additions & 41 deletions saist/util/filtering.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import logging
import os
import fnmatch
from pathlib import Path
from gitignore_parser import parse_gitignore_str

#TODO: Make this recognise more gitignore patterns, like simple directory matching with /
#TODO: Document in README
#TODO: Switch to verbose logging only

DEFAULT_EXTENSIONS = [
".c", ".cpp", ".h", ".hpp",
Expand All @@ -16,43 +14,76 @@

logger = logging.getLogger(__name__)

def load_patterns(filename):
"""
Loads glob-style patterns from a file.
Returns a list of patterns.
"""
if not os.path.exists(filename):
return []

logger.debug(f"load_patterns: Found {filename}, processing...")
with open(filename, "r") as f:
lines = f.readlines()

return [
line.strip()
for line in lines
if line.strip() and not line.strip().startswith("#")
]

def pattern_match(filepath, patterns):
normalized_path = filepath.replace("\\", "/")
return any(fnmatch.fnmatch(normalized_path, pattern) for pattern in patterns)

def should_process(filepath):
"""
Returns True if the file should be processed (included AND not ignored).
"""
logger.debug(f"should_process: {filepath}")
if not pattern_match(filepath, include_patterns):
return False
if pattern_match(filepath, ignore_patterns):
class FilterRules:
def __init__(self, include_patterns: list, exclude_patterns: list, include_rules_file: str | Path = "saist.include", exclude_rules_file: str | Path = "saist.ignore"):
"""
Load include/exclude rules from disk and command-line arguments
"""
# Load initial rules from disk
self.include_patterns = self.__load_rule_file(include_rules_file)
self.exclude_patterns = self.__load_rule_file(exclude_rules_file)

logger.debug(f"Processing inclusion rules from {include_rules_file} and exclusion rules from {exclude_rules_file}")

if not self.include_patterns:
# If no rules in saist.include, fallback to extension-based glob patterns like **/*.py
logger.debug("No saist.include, using defaults")
self.include_patterns = [f"*{ext}" for ext in DEFAULT_EXTENSIONS]

# Extend list of patterns loaded from disk / defaults with additional patterns from CLI arguments
# The list comprehensions here flatten the pattern arguments into a single list as argparse will return a nested list
if include_patterns:
self.include_patterns.extend([pattern for item in include_patterns for pattern in item])

if exclude_patterns:
self.exclude_patterns.extend([pattern for item in exclude_patterns for pattern in item])

logger.debug(f"include_patterns: {self.include_patterns}\nignore_patterns:{self.exclude_patterns}")

# Convert include/exclude pattern lists into a gitignore format for parsing with gitignore_parser
exclude_gitignore_str = "\n".join(self.exclude_patterns)
include_gitignore_str = "\n".join(self.include_patterns)

# Define functions for checking filenames against exclusion/inclusion lists in gitignore format
self.__exclusion_matches = parse_gitignore_str(exclude_gitignore_str, base_dir=Path(exclude_rules_file).parent)
self.__inclusion_matches = parse_gitignore_str(include_gitignore_str, base_dir=Path(include_rules_file).parent)

def __load_rule_file(self, file_path: str | Path):
"""
Load a exclude/include file from disk
"""
file_path = Path(file_path)
if file_path.exists():
with open(file_path, 'r') as fp:
return fp.readlines()
else:
return []

def file_exceeds_line_length_limit(self, file_content: str, patch_text: str, max_line_length: int = 1000) -> bool:
"""
Checks if any line in the file exceeds max_length.
Returns True if all lines are within the limit, False otherwise.
"""
for line in file_content.splitlines():
if len(line) > max_line_length:
return True

for line in patch_text.splitlines():
if len(line) > max_line_length:
return True

return False
return True

include_patterns = load_patterns("saist.include")
if not include_patterns:
# Fallback to extension-based glob patterns like **/*.py
include_patterns = [f"*{ext}" for ext in DEFAULT_EXTENSIONS]
ignore_patterns = load_patterns("saist.ignore")
def filename_included(self, filepath: str) -> bool:
"""
Returns True if the file is included in includelist and not explicitly ignored in ignorelist.
"""
if self.__exclusion_matches(filepath):
return False
if not self.__inclusion_matches(filepath):
return False

return True



logger.info(f"include_patterns: {include_patterns}\nignore_patterns:{ignore_patterns}")
Loading