diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 0000000..087644d --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,440 @@ +# Contributing to Subzy + +Thank you for your interest in contributing to Subzy! This document provides guidelines and instructions for contributing. + +## Table of Contents + +- [Getting Started](#getting-started) +- [Development Setup](#development-setup) +- [Pre-commit Hooks](#pre-commit-hooks) +- [Testing Guidelines](#testing-guidelines) +- [Logging Guidelines](#logging-guidelines) +- [Code Style](#code-style) +- [Pull Request Process](#pull-request-process) + +--- + +## Getting Started + +1. Fork the repository +2. Clone your fork: `git clone https://github.com/YOUR_USERNAME/subzy.git` +3. Add upstream remote: `git remote add upstream https://github.com/LukaSikic/subzy.git` +4. Create a feature branch: `git checkout -b feature/your-feature-name` + +--- + +## Development Setup + +### Prerequisites + +- Go 1.21 or later +- Git +- (Optional) golangci-lint for linting +- (Optional) pre-commit for automated checks + +### Install Dependencies + +```bash +# Install Go dependencies +go mod download + +# Install development tools +go install golang.org/x/tools/cmd/goimports@latest +go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest + +# Install pre-commit (Python) +pip install pre-commit +# OR on macOS +brew install pre-commit +``` + +### Build + +```bash +# Build the binary +make build +# OR +go build -o subzy main.go + +# Run tests +make test +# OR +go test ./... + +# Run with coverage +make test-coverage +``` + +--- + +## Pre-commit Hooks + +We use pre-commit hooks to ensure code quality before commits. + +### Installation + +```bash +# Install pre-commit hooks +pre-commit install + +# Test hooks on all files +pre-commit run --all-files +``` + +### What the Hooks Do + +1. **go-fmt** - Formats Go code with `gofmt` +2. **go-imports** - Organizes imports with `goimports` +3. **go-vet** - Runs static analysis +4. **go-mod-tidy** - Ensures go.mod/go.sum are clean +5. **go-test-short** - Runs quick tests +6. **golangci-lint** - Comprehensive linting (if installed) +7. **check-added-large-files** - Prevents large files (>1MB) +8. **check-merge-conflict** - Detects merge conflict markers +9. **trailing-whitespace** - Removes trailing whitespace +10. **end-of-file-fixer** - Ensures files end with newline +11. **check-yaml/json** - Validates YAML/JSON files +12. **detect-private-key** - Prevents committing secrets + +### Skip Hooks (Emergency Only) + +```bash +# Skip pre-commit hooks (not recommended) +git commit --no-verify -m "your message" + +# Skip specific hook +SKIP=go-test-short git commit -m "your message" +``` + +--- + +## Testing Guidelines + +### Test Coverage + +- **Target**: 70%+ overall coverage +- **Requirement**: All new code must have tests +- **Run coverage**: `make test-coverage` + +### Writing Tests + +#### Unit Tests + +```go +// runner/example_test.go +package runner + +import "testing" + +func TestFunctionName(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "normal case", + input: "test", + expected: "expected", + }, + // Add more test cases + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := FunctionName(tt.input) + if result != tt.expected { + t.Errorf("got %v, want %v", result, tt.expected) + } + }) + } +} +``` + +#### Integration Tests + +```go +func TestIntegrationScan(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + + // Create test server + ts := httptest.NewServer(...) + defer ts.Close() + + // Run test + config := &Config{...} + err := Process(context.Background(), config) + + // Assertions + if err != nil { + t.Fatalf("unexpected error: %v", err) + } +} +``` + +### Running Tests + +```bash +# All tests +go test ./... + +# With coverage +go test -cover ./... + +# With race detector +go test -race ./... + +# Verbose output +go test -v ./... + +# Short tests only (skip integration) +go test -short ./... + +# Specific package +go test ./runner + +# Specific test +go test ./runner -run TestFunctionName +``` + +### Benchmarks + +```bash +# Run benchmarks +go test -bench=. ./runner + +# With memory stats +go test -bench=. -benchmem ./runner + +# Compare benchmarks +go test -bench=. ./runner > old.txt +# Make changes +go test -bench=. ./runner > new.txt +benchstat old.txt new.txt +``` + +--- + +## Logging Guidelines + +### Using Structured Logging + +We use `zerolog` for structured logging. **Never use `fmt.Println` in production code.** + +```go +// Import +import "github.com/rs/zerolog/log" + +// Info level +logger.Info(). + Str("subdomain", subdomain). + Int("status_code", statusCode). + Msg("Checking subdomain") + +// Debug level +logger.Debug(). + Str("url", url). + Dur("duration", duration). + Msg("HTTP request completed") + +// Warning +logger.Warn(). + Str("subdomain", subdomain). + Err(err). + Msg("Retrying after error") + +// Error +logger.Error(). + Str("subdomain", subdomain). + Str("engine", engine). + Msg("Vulnerability detected") + +// With additional context +logger.Info(). + Str("subdomain", subdomain). + Int("attempt", attempt). + Dur("backoff", backoff). + Bool("success", success). + Msg("Operation result") +``` + +### Log Levels + +- **debug**: Detailed diagnostic information +- **info**: General informational messages +- **warn**: Warning messages (non-critical issues) +- **error**: Error messages (failures) + +### When to Log + +- **DO**: Log important state changes, errors, warnings +- **DO**: Log with context (subdomain, engine, status) +- **DO**: Use appropriate log levels +- **DON'T**: Log in tight loops without throttling +- **DON'T**: Log sensitive information (passwords, tokens) +- **DON'T**: Use `fmt.Println` in production code + +--- + +## Code Style + +### Go Style Guide + +Follow the [Uber Go Style Guide](https://github.com/uber-go/guide/blob/master/style.md) and [Effective Go](https://golang.org/doc/effective_go.html). + +### Key Points + +1. **Formatting**: Use `gofmt` (enforced by pre-commit) +2. **Imports**: Use `goimports` (enforced by pre-commit) +3. **Names**: Use camelCase for private, PascalCase for exported +4. **Comments**: Comment exported functions/types +5. **Errors**: Wrap errors with context +6. **Concurrency**: Use channels and goroutines appropriately + +### Example + +```go +// Good +func ProcessSubdomain(ctx context.Context, subdomain string) error { + logger.Debug().Str("subdomain", subdomain).Msg("Processing") + + result, err := checkVulnerability(ctx, subdomain) + if err != nil { + return fmt.Errorf("processing %s: %w", subdomain, err) + } + + return nil +} + +// Bad +func process_subdomain(subdomain string) error { + fmt.Println("Processing:", subdomain) // Don't use fmt.Println + result, err := checkVulnerability(subdomain) // Missing context + if err != nil { + return err // No context wrapping + } + return nil +} +``` + +--- + +## Pull Request Process + +### Before Submitting + +1. **Run pre-commit checks**: `pre-commit run --all-files` +2. **Run all tests**: `make test` +3. **Check coverage**: `make test-coverage` +4. **Run linter**: `make lint` or `golangci-lint run` +5. **Update documentation** if adding features +6. **Add/update tests** for your changes + +### PR Checklist + +- [ ] Tests added/updated +- [ ] Test coverage maintained/improved +- [ ] Documentation updated +- [ ] CHANGELOG.md updated (for user-facing changes) +- [ ] Pre-commit hooks pass +- [ ] All CI checks pass +- [ ] No merge conflicts +- [ ] Commits are meaningful and well-formatted + +### Commit Messages + +Follow [Conventional Commits](https://www.conventionalcommits.org/): + +``` +feat: add retry logic with exponential backoff +fix: resolve race condition in worker pool +docs: update README with new flags +test: add integration tests for DNS checking +perf: optimize fingerprint matching with Aho-Corasick +refactor: extract logger initialization to separate function +chore: update dependencies +``` + +### PR Title Format + +``` +[Type] Brief description + +Examples: +[Feature] Add Graylog logging integration +[Fix] Resolve memory leak in worker pool +[Docs] Update installation instructions +[Performance] Optimize fingerprint matching +``` + +### PR Description Template + +```markdown +## Description +Brief description of what this PR does. + +## Type of Change +- [ ] Bug fix (non-breaking change which fixes an issue) +- [ ] New feature (non-breaking change which adds functionality) +- [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) +- [ ] Documentation update + +## Testing +Describe the tests you ran and how to reproduce. + +## Checklist +- [ ] Pre-commit hooks pass +- [ ] Tests pass locally +- [ ] Added/updated tests +- [ ] Updated documentation +- [ ] Updated CHANGELOG.md +``` + +--- + +## Development Workflow + +### Daily Development + +```bash +# Update from upstream +git fetch upstream +git rebase upstream/master + +# Create feature branch +git checkout -b feature/my-feature + +# Make changes and test +go test ./... + +# Commit (triggers pre-commit hooks) +git commit -m "feat: my feature" + +# Push to your fork +git push origin feature/my-feature + +# Create PR on GitHub +``` + +### Code Review + +- Address all review comments +- Keep the PR focused and small +- Respond to feedback promptly +- Update the PR with requested changes + +--- + +## Questions? + +- Check existing issues: https://github.com/LukaSikic/subzy/issues +- Read documentation: README.md, IMPLEMENTATION_PLAN.md +- Ask in PR comments + +--- + +## License + +By contributing, you agree that your contributions will be licensed under the GPLv2 License. diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..50f1346 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,119 @@ +name: CI + +on: + push: + branches: [ master, main ] + pull_request: + branches: [ master, main ] + +jobs: + test: + name: Test + runs-on: ubuntu-latest + strategy: + matrix: + go-version: ['1.21', '1.22', '1.23'] + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ matrix.go-version }} + + - name: Cache Go modules + uses: actions/cache@v4 + with: + path: | + ~/.cache/go-build + ~/go/pkg/mod + key: ${{ runner.os }}-go-${{ matrix.go-version }}-${{ hashFiles('**/go.sum') }} + restore-keys: | + ${{ runner.os }}-go-${{ matrix.go-version }}- + + - name: Download dependencies + run: go mod download + + - name: Verify dependencies + run: go mod verify + + - name: Run go vet + run: go vet ./... + + - name: Run tests + run: go test -v -race -coverprofile=coverage.out -covermode=atomic ./... + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: ./coverage.out + flags: unittests + name: codecov-umbrella + + lint: + name: Lint + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v4 + with: + version: latest + args: --timeout=5m + + build: + name: Build + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Build + run: go build -v -o subzy main.go + + - name: Test binary + run: ./subzy version + + security: + name: Security Scan + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: '1.21' + + - name: Run Gosec Security Scanner + uses: securego/gosec@master + with: + args: '-no-fail -fmt sarif -out results.sarif ./...' + + - name: Upload SARIF file + uses: github/codeql-action/upload-sarif@v3 + with: + sarif_file: results.sarif + + - name: Run govulncheck + run: | + go install golang.org/x/vuln/cmd/govulncheck@latest + govulncheck ./... diff --git a/.gitignore b/.gitignore index 2900cad..2567ece 100644 --- a/.gitignore +++ b/.gitignore @@ -153,3 +153,10 @@ build/* *.syso .idea + +# Binary +subzy + +# Test coverage files +coverage.out +coverage.html diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..fc9b3d5 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,89 @@ +run: + timeout: 5m + tests: true + modules-download-mode: readonly + +linters: + enable: + - bodyclose + - dogsled + - errcheck + - goconst + - gocritic + - gofmt + - goimports + - goprintffuncname + - gosec + - gosimple + - govet + - ineffassign + - misspell + - nakedret + - revive + - staticcheck + - stylecheck + - typecheck + - unconvert + - unparam + - unused + - whitespace + +linters-settings: + govet: + check-shadowing: true + gofmt: + simplify: true + gocritic: + enabled-tags: + - diagnostic + - experimental + - opinionated + - performance + - style + errcheck: + check-type-assertions: true + check-blank: true + goconst: + min-len: 3 + min-occurrences: 3 + misspell: + locale: US + revive: + rules: + - name: exported + severity: warning + disabled: false + - name: package-comments + severity: warning + disabled: true + - name: unexported-return + severity: warning + disabled: false + +issues: + exclude-rules: + # Exclude some linters from running on tests files + - path: _test\.go + linters: + - goconst + - errcheck + - gosec + + # Maximum issues count per one linter + max-issues-per-linter: 50 + + # Maximum count of issues with the same text + max-same-issues: 3 + + # Show only new issues + new: false + + # Fix found issues (if supported by the linter) + fix: false + +output: + format: colored-line-number + print-issued-lines: true + print-linter-name: true + uniq-by-line: true + sort-results: true diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..1752f67 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,81 @@ +repos: + - repo: local + hooks: + # Go formatting + - id: go-fmt + name: Go Format + entry: gofmt -w + language: system + files: \.go$ + description: Run gofmt on Go files + + # Go imports + - id: go-imports + name: Go Imports + entry: bash -c 'goimports -w $(find . -type f -name "*.go" | grep -v vendor)' + language: system + files: \.go$ + description: Run goimports on Go files + pass_filenames: false + + # Go vet + - id: go-vet + name: Go Vet + entry: go vet ./... + language: system + pass_filenames: false + description: Run go vet + + # Go mod tidy + - id: go-mod-tidy + name: Go Mod Tidy + entry: go mod tidy + language: system + pass_filenames: false + description: Ensure go.mod and go.sum are tidy + + # Go test (short) + - id: go-test-short + name: Go Test (short) + entry: go test -short ./... + language: system + pass_filenames: false + description: Run short tests + + # golangci-lint (if installed) + - id: golangci-lint + name: golangci-lint + entry: bash -c 'if command -v golangci-lint >/dev/null 2>&1; then golangci-lint run; else echo "golangci-lint not installed, skipping"; fi' + language: system + pass_filenames: false + description: Run golangci-lint if available + + # Check for large files + - id: check-added-large-files + name: Check for large files + entry: bash -c 'for file in $(git diff --cached --name-only --diff-filter=A); do size=$(wc -c < "$file" 2>/dev/null || echo 0); if [ "$size" -gt 1048576 ]; then echo "Error: $file is larger than 1MB"; exit 1; fi; done' + language: system + pass_filenames: false + description: Prevent committing files larger than 1MB + + # Check for merge conflicts + - id: check-merge-conflict + name: Check for merge conflicts + entry: bash -c 'if git diff --cached | grep -q "^<<<<<<<\\|^=======\\|^>>>>>>>"; then echo "Error: Merge conflict markers found"; exit 1; fi' + language: system + pass_filenames: false + description: Check for merge conflict markers + + # Standard pre-commit hooks + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.5.0 + hooks: + - id: trailing-whitespace + args: [--markdown-linebreak-ext=md] + - id: end-of-file-fixer + - id: check-yaml + - id: check-json + - id: check-added-large-files + args: ['--maxkb=1024'] + - id: check-merge-conflict + - id: detect-private-key diff --git a/AUDIT_REPORT.md b/AUDIT_REPORT.md new file mode 100644 index 0000000..29485be --- /dev/null +++ b/AUDIT_REPORT.md @@ -0,0 +1,551 @@ +# Comprehensive Subzy Application Audit Report + +**Date**: 2025-11-10 +**Auditor**: Claude Code +**Version Audited**: v1.1.0 +**Go Version**: 1.19 (System: 1.24.7) + +## Executive Summary + +Subzy is a subdomain takeover detection tool written in Go. While the core functionality is solid, the application has **zero test coverage**, several **critical bugs**, **outdated dependencies**, and multiple opportunities for **performance and architectural improvements**. + +--- + +## 1. CRITICAL BUGS ๐Ÿ”ด + +### 1.1 Error Return Bug (runner/download.go:50) +**Location**: `runner/download.go:50` +**Severity**: HIGH +**Issue**: Error is created but not returned +```go +_, err = io.Copy(out, resp.Body) +if err != nil { + fmt.Errorf("downloadFingerprints: %v", err) // BUG: Not returned! +} +return nil // Always returns nil even on error +``` +**Impact**: Silent failures during fingerprint downloads + +### 1.2 Race Condition (runner/process.go:44-53) +**Location**: `runner/process.go:44-53` +**Severity**: HIGH +**Issue**: Concurrent writes to slice without mutex protection +```go +var results []*subdomainResult +go func() { + for r := range resCh { + // ... + results = append(results, r) // RACE CONDITION + } +}() +``` +**Impact**: Potential data corruption or crashes under high concurrency + +### 1.3 Missing Discussion Field (runner/helpers.go:26 & process.go:99) +**Location**: `runner/helpers.go:26` and `runner/process.go:99` +**Severity**: MEDIUM +**Issue**: `Discussion` field defined in struct but never populated in JSON output +```go +type subdomainResult struct { + // ... + Discussion string `json:"discussion"` // Never populated! +} +``` +**Impact**: Incomplete JSON output + +--- + +## 2. MISSING TESTS โš ๏ธ + +### Test Coverage: **0%** + +**No test files exist in the entire codebase:** +- No `*_test.go` files +- No test framework configured +- No CI/CD pipeline for automated testing +- No code coverage reporting + +**Critical Functions Without Tests:** +1. `checkSubdomain()` - Core vulnerability detection +2. `matchResponse()` - Fingerprint matching logic +3. `readSubdomains()` - File parsing +4. `downloadFingerprints()` - Network operations +5. `Process()` - Main orchestration logic + +**Recommended Test Priorities:** +1. Unit tests for `matchResponse()` with various fingerprint scenarios +2. Integration tests for HTTP client behavior +3. Table-driven tests for URL validation +4. Mock HTTP server tests for end-to-end flow +5. Concurrency tests to catch race conditions +6. Error handling tests for all error paths + +--- + +## 3. OUTDATED DEPENDENCIES ๐Ÿ“ฆ + +**System Go Version**: 1.24.7 (latest) +**Project Go Version**: 1.19 (outdated by 5 major versions) + +### Dependencies Needing Updates: + +| Package | Current | Latest | Delta | +|---------|---------|--------|-------| +| `github.com/spf13/cobra` | v1.6.1 | v1.10.1 | +4 minor | +| `github.com/inconshreveable/mousetrap` | v1.0.1 | v1.1.0 | +1 minor | +| `github.com/spf13/pflag` | v1.0.5 | v1.0.10 | +5 patch | +| `github.com/cpuguy83/go-md2man/v2` | v2.0.2 | v2.0.7 | +5 patch | +| `gopkg.in/check.v1` | 2016 version | v1.0.0-20201130... | Major lag | + +### Recommendations: +1. Update Go version to 1.21+ (minimum) +2. Run `go get -u ./...` to update all dependencies +3. Update `go.mod` to use Go 1.21+ +4. Test thoroughly after updates + +--- + +## 4. PERFORMANCE ISSUES & INEFFICIENCIES โšก + +### 4.1 Duplicate Fingerprint Loading (runner/process.go:16, 22) +```go +func Process(config *Config) error { + fingerprints, err := Fingerprints() // Load 1 + // ... + config.loadFingerprints() // Load 2 (calls Fingerprints again!) +``` +**Impact**: File read and JSON parsing done twice on every run + +### 4.2 Sequential Fingerprint Matching (runner/worker.go:47-60) +```go +func (c *Config) matchResponse(body string) Result { + for _, fingerprint := range c.fingerprints { // O(n) scan + if strings.Contains(body, fingerprint.Fingerprint) { +``` +**Issue**: Linear search through 44 fingerprints for every subdomain +**Impact**: With 1000 subdomains, this performs 44,000 string searches +**Solution**: Pre-compile fingerprints into optimized data structure (trie, regex, or Aho-Corasick) + +### 4.3 Inefficient Channel Buffering (runner/process.go:38) +```go +subdomainCh := make(chan string, config.Concurrency+5) // Why +5? +``` +**Issue**: Arbitrary buffer size without justification +**Better**: `config.Concurrency * 2` or match workload size + +### 4.4 No HTTP Connection Pooling Configuration (runner/config.go:24-36) +```go +client := &http.Client{ + Timeout: timeout, + Transport: tr, +} +``` +**Missing**: Connection pool tuning +```go +// Recommended additions: +tr.MaxIdleConns = 100 +tr.MaxIdleConnsPerHost = config.Concurrency +tr.IdleConnTimeout = 90 * time.Second +``` + +### 4.5 Unused Function (runner/process.go:117-119) +```go +func generator(subdomain string, subdomainCh chan string) { + subdomainCh <- subdomain +} +``` +**Issue**: Dead code never called +**Action**: Remove + +### 4.6 Response Body Reading (runner/worker.go:37) +```go +body, err := io.ReadAll(resp.Body) +``` +**Issue**: No size limit - vulnerable to memory exhaustion +**Solution**: Use `io.LimitReader` with reasonable max size (e.g., 1MB) + +--- + +## 5. CODE QUALITY IMPROVEMENTS ๐Ÿ”ง + +### 5.1 Error Handling Inconsistency +**Mixed Patterns:** +- `log.Fatalf()` in `process.go:125` (abrupt exit) +- `return error` in most other places (proper error bubbling) + +**Recommendation**: Use consistent error handling - prefer returning errors + +### 5.2 Missing Context Support +**Issue**: No cancellation or timeout control beyond HTTP timeout +**Example Use Case**: User presses Ctrl+C during long scan + +**Recommended Change:** +```go +func Process(ctx context.Context, config *Config) error { + // Use ctx.Done() for graceful shutdown +} +``` + +### 5.3 Boolean Comparison Anti-Pattern (runner/helpers.go:6) +```go +if setting == true { // Verbose + return "[ Yes ]" +} +``` +**Better:** +```go +if setting { + return "[ Yes ]" +} +``` + +### 5.4 URL Validation Logic (runner/worker.go:25) +```go +if isValidUrl(subdomain) == false { // Double negative + verbose +``` +**Better:** +```go +if !isValidUrl(subdomain) { +``` + +### 5.5 Missing Error Handling (cmd/root.go:14) +```go +func Execute() { + rootCmd.Execute() // Error ignored +} +``` +**Better:** +```go +func Execute() error { + return rootCmd.Execute() +} +``` + +### 5.6 Struct Field Naming (runner/fingerprints.go:15) +```go +False_Positive []string // Non-idiomatic snake_case +``` +**Better:** +```go +FalsePositive []string // Or FalsePositives for plural +``` + +--- + +## 6. SECURITY CONSIDERATIONS ๐Ÿ”’ + +### 6.1 Insecure Default TLS Configuration +```go +TLSClientConfig: &tls.Config{InsecureSkipVerify: !s.VerifySSL} +``` +**Issue**: Defaults to skipping SSL verification +**Risk**: MITM attacks, certificate validation bypass +**Recommendation**: Default to secure, require explicit flag for insecure + +### 6.2 No Rate Limiting +**Issue**: Can overwhelm targets or get IP banned +**Recommendation**: Add configurable rate limiting (requests/second) + +### 6.3 No User-Agent Configuration +**Issue**: Default Go user-agent may be blocked/flagged +**Recommendation**: Add configurable User-Agent header + +### 6.4 Hardcoded GitHub URL (runner/download.go:17) +```go +fingerprintPath = "https://raw.githubusercontent.com/..." +``` +**Issue**: No verification of downloaded content (no checksum, signature) +**Risk**: Supply chain attack vector +**Recommendation**: Add integrity verification + +--- + +## 7. MISSING FEATURES & ENHANCEMENTS ๐Ÿš€ + +### 7.1 No Logging Framework +- Only `fmt.Printf` and `log.Fatalf` +- No log levels (debug, info, warn, error) +- No structured logging + +**Recommendation**: Add structured logging (e.g., `zerolog`, `zap`) + +### 7.2 No Progress Indicators +**Issue**: No feedback during long scans +**Recommendation**: Add progress bar (e.g., `progressbar` library) + +### 7.3 No Retry Logic +**Issue**: Transient network failures cause immediate failure +**Recommendation**: Add exponential backoff retry mechanism + +### 7.4 No Output Formats Beyond JSON +**Missing**: CSV, YAML, plain text, HTML reports +**Recommendation**: Support multiple output formats + +### 7.5 No Statistics/Summary +**Missing**: Total checked, vulnerable count, error count, duration +**Recommendation**: Print summary report at end + +### 7.6 No Input Validation +- No check for empty target lists +- No validation of file existence before reading +- No validation of concurrent worker count (could be 0 or negative) + +--- + +## 8. ARCHITECTURE IMPROVEMENTS ๐Ÿ—๏ธ + +### 8.1 Separate Concerns: HTTP Logic from Business Logic +**Current**: HTTP client mixed with fingerprint matching +**Better**: Create separate `HTTPClient` interface for testability + +### 8.2 Fingerprint Matching Optimization + +**Option A: Pre-compile Regex (if patterns are simple)** +```go +type CompiledFingerprint struct { + Regex *regexp.Regexp + FalsePositive []*regexp.Regexp + // ... +} +``` + +**Option B: Aho-Corasick Algorithm** +- Build trie of all fingerprint strings +- Single pass through response body +- O(n+m) instead of O(n*m) + +### 8.3 Plugin Architecture for Fingerprints +**Current**: Static JSON file +**Enhancement**: Support custom fingerprint sources: +- Local custom fingerprints +- Multiple remote sources +- User-defined fingerprints +- Community-contributed modules + +### 8.4 Result Storage Backend +**Current**: In-memory accumulation +**Enhancement**: Stream results to: +- Database (SQLite, PostgreSQL) +- Message queue (NATS, Kafka) +- External APIs +- Real-time webhooks + +--- + +## 9. FUTURE FEATURE OPPORTUNITIES ๐Ÿ’ก + +### 9.1 Enhanced Scanning Modes +1. **Passive Mode**: DNS-only checks (no HTTP requests) +2. **Aggressive Mode**: Try multiple protocols, ports +3. **Stealth Mode**: Slower scanning with randomized delays +4. **Smart Mode**: Adaptive concurrency based on target response times + +### 9.2 DNS Integration +- Check CNAME records before HTTP requests +- Identify DNS-based takeovers (dangling CNAMEs) +- Cache DNS results to avoid repeated lookups + +### 9.3 Historical Tracking +- Store scan history +- Compare results over time +- Alert on newly vulnerable subdomains +- Track remediation progress + +### 9.4 Notification System +- Slack/Discord webhooks +- Email alerts +- PagerDuty integration +- Custom webhook support + +### 9.5 Multi-Tenant Support +- API server mode (RESTful API) +- Web dashboard +- User authentication +- Scheduled scans +- Role-based access control + +### 9.6 Machine Learning Enhancements +- Anomaly detection for unknown takeover patterns +- Confidence scoring for vulnerability likelihood +- Pattern learning from false positives +- Automatic fingerprint generation + +### 9.7 Cloud Provider Deep Integration +- AWS S3 specific checks (bucket policies) +- Azure Blob specific validations +- GCP Storage checks +- Direct cloud API validation (not just HTTP) + +### 9.8 Compliance & Reporting +- Generate compliance reports (SOC2, ISO27001) +- Executive summaries +- Trend analysis +- Risk scoring +- Remediation workflows + +--- + +## 10. DEPENDENCY & TOOLING ENHANCEMENTS ๐Ÿ› ๏ธ + +### 10.1 Missing Development Tools + +**Recommended Additions:** + +1. **Makefile** for common tasks: +```makefile +.PHONY: test build lint +test: + go test -v -race -cover ./... +build: + go build -o subzy main.go +lint: + golangci-lint run +``` + +2. **golangci-lint** configuration (`.golangci.yml`) +3. **Pre-commit hooks** (`.pre-commit-config.yaml`) +4. **GitHub Actions CI/CD** (`.github/workflows/`) +5. **Dependency scanning** (Dependabot, Renovate) +6. **Dockerfile** for containerized deployment +7. **Docker Compose** for testing environment + +### 10.2 Documentation Gaps +- No CONTRIBUTING.md +- No CHANGELOG.md +- No API documentation (if adding server mode) +- No architecture diagrams +- No performance benchmarks + +--- + +## 11. PERFORMANCE BENCHMARKS & TARGETS ๐Ÿ“Š + +### Current Performance (Estimated) +- **Throughput**: ~10-100 subdomains/second (depends on concurrency) +- **Memory**: Unbounded (no limits on response size or result accumulation) +- **CPU**: Low (mostly I/O bound) + +### Recommended Targets +- **Throughput**: 500+ subdomains/second with optimized matching +- **Memory**: <100MB for 10,000 subdomains +- **Latency**: <100ms per subdomain (network dependent) +- **Accuracy**: <0.1% false positive rate + +### Suggested Benchmarks to Add +```go +func BenchmarkMatchResponse(b *testing.B) { + // Benchmark fingerprint matching +} + +func BenchmarkConcurrentProcessing(b *testing.B) { + // Benchmark parallel worker pool +} +``` + +--- + +## 12. PRIORITIZED ACTION PLAN ๐Ÿ“‹ + +### Phase 1: Critical Fixes (Immediate - Week 1) +1. Fix error return bug in `download.go:50` +2. Fix race condition with mutex in `process.go` +3. Remove duplicate fingerprint loading +4. Add Discussion field population +5. Update Go version to 1.21+ +6. Update all dependencies + +### Phase 2: Testing Foundation (Week 2-3) +1. Add unit tests for core functions (70%+ coverage target) +2. Add integration tests +3. Set up GitHub Actions CI/CD +4. Add golangci-lint +5. Add pre-commit hooks + +### Phase 3: Performance Optimizations (Week 4-5) +1. Optimize fingerprint matching (Aho-Corasick or trie) +2. Add response body size limits +3. Improve HTTP connection pooling +4. Add benchmarks and profiling +5. Remove dead code + +### Phase 4: Quality & Security (Week 6-7) +1. Add structured logging +2. Add progress indicators +3. Add retry logic with exponential backoff +4. Add context support for cancellation +5. Add input validation +6. Secure TLS defaults +7. Add rate limiting + +### Phase 5: Feature Enhancements (Week 8-10) +1. Add DNS checking before HTTP +2. Add multiple output formats +3. Add statistics/summary reporting +4. Add configurable User-Agent +5. Add plugin system for custom fingerprints + +### Phase 6: Advanced Features (Future) +1. Add API server mode +2. Add web dashboard +3. Add historical tracking +4. Add notification integrations +5. Add ML-based detection + +--- + +## 13. CODE METRICS SUMMARY ๐Ÿ“ˆ + +``` +Total Lines of Code: ~501 Go LOC +Test Coverage: 0% +Number of Tests: 0 +Cyclomatic Complexity: Low-Medium +Technical Debt Ratio: Medium-High +Maintainability Index: Good (clean structure) +Bug Density: 3 critical bugs / 501 LOC = 0.6% +Dependency Health: 5 outdated dependencies +Security Issues: 2 medium severity +Performance Issues: 6 identified optimizations +``` + +--- + +## 14. FINAL RECOMMENDATIONS ๐ŸŽฏ + +### Immediate Actions (Do Now): +1. **Fix the critical bugs** (download.go, race condition) +2. **Add basic unit tests** (at minimum for core functions) +3. **Update dependencies** and Go version + +### Short-term Improvements (Next Sprint): +1. **Set up CI/CD pipeline** with automated testing +2. **Add structured logging** for better observability +3. **Optimize fingerprint matching** for 5-10x performance gain +4. **Add input validation** and error handling improvements + +### Long-term Vision (Next Quarter): +1. **Achieve 80%+ test coverage** +2. **Build plugin architecture** for extensibility +3. **Add DNS integration** for more accurate detection +4. **Create API/dashboard** for enterprise use cases + +--- + +## Conclusion + +Subzy is a **well-structured security tool** with a **clean separation of concerns** and **solid core functionality**. However, it suffers from: +- **Zero test coverage** (highest risk) +- **Several critical bugs** affecting reliability +- **Outdated dependencies** (security & compatibility risk) +- **Performance inefficiencies** limiting scalability +- **Missing modern features** expected in security tools + +**With focused effort on the prioritized action plan, Subzy can evolve into a production-grade, enterprise-ready subdomain takeover detection platform.** + +**Overall Grade: C+** (Functional but needs quality improvements) +- Functionality: B +- Code Quality: C+ +- Test Coverage: F +- Performance: C +- Security: B- +- Maintainability: B- diff --git a/GRAYLOG_SETUP.md b/GRAYLOG_SETUP.md new file mode 100644 index 0000000..56bdaa0 --- /dev/null +++ b/GRAYLOG_SETUP.md @@ -0,0 +1,536 @@ +# Graylog Integration Guide + +This guide explains how to set up and use Graylog logging with Subzy. + +## Table of Contents + +- [Quick Start](#quick-start) +- [Local Development Setup](#local-development-setup) +- [Production Setup](#production-setup) +- [Usage Examples](#usage-examples) +- [Dashboard Configuration](#dashboard-configuration) +- [Troubleshooting](#troubleshooting) + +--- + +## Quick Start + +### 1. Start Local Graylog (Docker) + +```bash +# Start Graylog, MongoDB, and Elasticsearch +docker-compose up -d + +# Wait for services to start (about 60 seconds) +docker-compose logs -f graylog + +# When you see "Graylog server up and running", press Ctrl+C +``` + +### 2. Configure Graylog Input + +1. Open http://localhost:9000 +2. Login: `admin` / `admin` +3. Go to **System** โ†’ **Inputs** +4. Select **GELF UDP** from dropdown +5. Click **Launch new input** +6. Configure: + - **Title**: Subzy + - **Port**: 12201 (default) + - **Bind address**: 0.0.0.0 +7. Click **Save** + +### 3. Run Subzy with Graylog + +```bash +# Run with Graylog logging +./subzy run \ + --target example.com \ + --log-level debug \ + --graylog-host localhost:12201 + +# View logs in Graylog web UI +``` + +--- + +## Local Development Setup + +### Prerequisites + +- Docker and Docker Compose +- 4GB+ RAM available for containers +- Ports 9000, 12201 available + +### Full Setup + +```bash +# Clone repository +git clone https://github.com/LukaSikic/subzy.git +cd subzy + +# Start Graylog stack +docker-compose up -d + +# Check services are running +docker-compose ps + +# Expected output: +# subzy-graylog Up 9000/tcp, 12201/udp +# subzy-mongodb Up 27017/tcp +# subzy-elasticsearch Up 9200/tcp + +# Wait for Graylog to be ready +docker-compose logs -f graylog +# Look for: "Graylog server up and running" + +# Build subzy +make build + +# Test logging +./subzy run --target test.example.com --graylog-host localhost:12201 --log-level debug +``` + +### Accessing Graylog + +- **URL**: http://localhost:9000 +- **Username**: admin +- **Password**: admin + +--- + +## Production Setup + +### On Existing Graylog Server + +If you already have a Graylog server: + +```bash +# Run subzy with your Graylog server +./subzy run \ + --targets domains.txt \ + --graylog-host graylog.company.com:12201 \ + --graylog-app subzy-prod \ + --log-level info +``` + +### Security Considerations + +1. **Change default password** in Graylog +2. **Update GRAYLOG_PASSWORD_SECRET** in docker-compose.yml +3. **Use TLS** for production GELF connections +4. **Restrict network access** to Graylog ports +5. **Enable authentication** if exposing publicly + +### Production docker-compose.yml + +```yaml +version: '3' + +services: + graylog: + image: graylog/graylog:5.0 + environment: + - GRAYLOG_PASSWORD_SECRET=${GRAYLOG_SECRET} # From .env file + - GRAYLOG_ROOT_PASSWORD_SHA2=${GRAYLOG_PASSWORD_HASH} + - GRAYLOG_HTTP_EXTERNAL_URI=https://graylog.company.com/ + # ... other settings + volumes: + - /opt/graylog/data:/usr/share/graylog/data + ports: + - "127.0.0.1:9000:9000" # Only localhost access + - "12201:12201/udp" # GELF input +``` + +--- + +## Usage Examples + +### Basic Logging + +```bash +# Console logging (default) +./subzy run --target example.com + +# JSON logging to stdout +./subzy run --target example.com --log-format json + +# Debug level logging +./subzy run --target example.com --log-level debug +``` + +### Graylog Integration + +```bash +# Send logs to Graylog only +./subzy run \ + --targets domains.txt \ + --graylog-host graylog.internal:12201 \ + --graylog-app subzy-scanner \ + --log-format json + +# Graylog + console output +./subzy run \ + --targets domains.txt \ + --graylog-host localhost:12201 \ + --log-format console + +# Graylog + file logging +./subzy run \ + --targets domains.txt \ + --graylog-host localhost:12201 \ + --log-file \ + --log-file-path /var/log/subzy/scan.log +``` + +### Multi-Output Logging + +```bash +# Console + Graylog + File +./subzy run \ + --targets domains.txt \ + --log-level info \ + --log-format console \ + --graylog-host localhost:12201 \ + --graylog-app subzy \ + --log-file \ + --log-file-path subzy-$(date +%Y%m%d).log +``` + +### Production Scanning + +```bash +# Production scan with all logging +./subzy run \ + --targets /opt/subzy/domains.txt \ + --output /opt/subzy/results/scan-$(date +%Y%m%d-%H%M).json \ + --graylog-host graylog.internal:12201 \ + --graylog-app subzy-prod \ + --log-level info \ + --log-file \ + --log-file-path /var/log/subzy/scan-$(date +%Y%m%d-%H%M).log \ + --concurrency 50 \ + --timeout 15 \ + --verify_ssl +``` + +--- + +## Dashboard Configuration + +### Creating Dashboards in Graylog + +#### 1. Scan Overview Dashboard + +1. Go to **Dashboards** โ†’ **Create dashboard** +2. Name: "Subzy Scan Overview" +3. Add widgets: + +**Total Scans** (Count) +- **Search**: `app:subzy AND message:"Starting subdomain takeover scan"` +- **Type**: Count +- **Time Range**: Last 24 hours + +**Vulnerable Subdomains** (Count) +- **Search**: `app:subzy AND message:"Vulnerable subdomain detected"` +- **Type**: Count +- **Time Range**: Last 24 hours + +**Scan Duration** (Stats) +- **Search**: `app:subzy AND message:"Scan completed"` +- **Type**: Statistics +- **Field**: `duration_ms` + +**Vulnerabilities by Engine** (Pie Chart) +- **Search**: `app:subzy AND status:vulnerable` +- **Type**: Pie chart +- **Field**: `engine` + +#### 2. Real-Time Monitoring Dashboard + +**Live Vulnerability Feed** (Message Table) +- **Search**: `app:subzy AND level:error` +- **Type**: Message table +- **Fields**: timestamp, subdomain, engine, documentation +- **Sort**: Timestamp desc + +**Error Rate** (Line Chart) +- **Search**: `app:subzy AND level:error` +- **Type**: Line chart +- **Interval**: 5 minutes + +**Scan Progress** (Count) +- **Search**: `app:subzy AND message:"Subdomain check completed"` +- **Type**: Count +- **Time Range**: Last 1 hour + +### Example Graylog Queries + +``` +# All vulnerabilities +app:subzy AND status:vulnerable + +# Vulnerabilities for specific service +app:subzy AND status:vulnerable AND engine:"AWS S3" + +# HTTP errors +app:subzy AND status:"http error" + +# Specific subdomain +app:subzy AND subdomain:"test.example.com" + +# High-level errors only +app:subzy AND level:error + +# Scans from specific instance +app:subzy AND graylog_app:"subzy-prod" + +# Time range +app:subzy AND timestamp:[2025-01-01 TO 2025-01-31] +``` + +### Alerts Configuration + +#### Alert: New Vulnerability Detected + +1. Go to **Alerts** โ†’ **Event Definitions** โ†’ **Create Event Definition** +2. Configure: + - **Title**: New Subdomain Vulnerability + - **Priority**: High + - **Condition**: Filter & Aggregation + - **Search Query**: `app:subzy AND status:vulnerable` + - **Aggregation**: count() + - **Threshold**: >= 1 + - **Time Range**: 5 minutes + - **Notification**: Email/Slack + +#### Alert: High Error Rate + +1. **Title**: Subzy High Error Rate +2. **Condition**: `app:subzy AND level:error` +3. **Aggregation**: count() +4. **Threshold**: >= 10 in 5 minutes + +--- + +## Structured Log Fields + +Subzy sends these structured fields to Graylog: + +| Field | Type | Description | Example | +|-------|------|-------------|---------| +| `app` | string | Application name | `subzy` | +| `level` | string | Log level | `info`, `error` | +| `subdomain` | string | Target subdomain | `test.example.com` | +| `status` | string | Check result | `vulnerable`, `http error` | +| `engine` | string | Service engine | `AWS S3`, `GitHub Pages` | +| `documentation` | string | Remediation URL | `https://...` | +| `discussion` | string | Discussion URL | `https://...` | +| `target_count` | int | Number of targets | `1000` | +| `fingerprint_count` | int | Fingerprints loaded | `44` | +| `concurrency` | int | Worker count | `10` | +| `timeout_seconds` | int | Request timeout | `10` | +| `output_file` | string | Result file | `results.json` | + +--- + +## Troubleshooting + +### Graylog Not Receiving Logs + +**Check 1: Graylog is running** +```bash +docker-compose ps +# All services should be "Up" +``` + +**Check 2: Input is configured** +```bash +# In Graylog UI: System โ†’ Inputs +# Should see "GELF UDP" input running on port 12201 +``` + +**Check 3: Port is accessible** +```bash +# Test UDP port +nc -u -v localhost 12201 +``` + +**Check 4: Firewall rules** +```bash +# Allow UDP 12201 +sudo ufw allow 12201/udp +``` + +### Logs Not Appearing in Graylog + +**Check Subzy is sending logs** +```bash +# Run with debug logging +./subzy run --target example.com --graylog-host localhost:12201 --log-level debug + +# Should not show connection errors +``` + +**Check Graylog logs** +```bash +docker-compose logs graylog | grep -i error +``` + +**Check Input statistics** +```bash +# In Graylog UI: System โ†’ Inputs โ†’ GELF UDP โ†’ Show received messages +# Should show incoming messages +``` + +### Connection Refused + +```bash +# Error: "failed to create Graylog writer: connection refused" + +# Solution 1: Check Graylog is running +docker-compose ps + +# Solution 2: Check correct host/port +# Use: localhost:12201 (local) +# Not: graylog:12201 (Docker internal) +``` + +### No Data in Dashboards + +```bash +# Check time range +# Graylog default: Last 5 minutes +# Change to: Last 24 hours + +# Check search query +# Ensure: app:subzy (not app:"subzy") + +# Check index +# System โ†’ Indices โ†’ Should see data in default index +``` + +--- + +## Performance Considerations + +### GELF UDP Performance + +- **UDP is fast** but can lose packets +- For production, consider **GELF TCP** (port 12201) +- UDP is fine for development and most use cases + +### Resource Usage + +**Graylog Stack Resources:** +- MongoDB: ~200MB RAM +- Elasticsearch: ~1GB RAM +- Graylog: ~1GB RAM +- Total: ~2.5GB RAM minimum + +**Subzy Logging Overhead:** +- Console: Minimal +- GELF UDP: < 1ms per log +- File: < 5ms per log +- Total impact: < 5% performance + +### Scaling Graylog + +For high-volume logging: + +1. **Increase Elasticsearch heap**: + ```yaml + ES_JAVA_OPTS: -Xms2g -Xmx2g + ``` + +2. **Add Elasticsearch nodes** for clustering + +3. **Use GELF TCP** for reliability + +4. **Enable Graylog processing buffers** + +--- + +## Integration Examples + +### With CI/CD Pipeline + +```yaml +# .gitlab-ci.yml +security_scan: + script: + - ./subzy run \ + --targets domains.txt \ + --graylog-host graylog.internal:12201 \ + --graylog-app "subzy-ci-${CI_PIPELINE_ID}" \ + --log-level info \ + --output results.json + - cat results.json +``` + +### With Kubernetes + +```yaml +# kubernetes/cronjob.yaml +apiVersion: batch/v1 +kind: CronJob +metadata: + name: subzy-scan +spec: + schedule: "0 2 * * *" # Daily at 2 AM + jobTemplate: + spec: + template: + spec: + containers: + - name: subzy + image: subzy:latest + args: + - run + - --targets + - /config/domains.txt + - --graylog-host + - graylog-service.logging:12201 + - --graylog-app + - subzy-k8s + - --log-level + - info + volumeMounts: + - name: domains + mountPath: /config +``` + +### With Ansible + +```yaml +# playbook.yml +- name: Run Subzy scan + hosts: scanner + tasks: + - name: Run subdomain scan + command: + cmd: > + /opt/subzy/subzy run + --targets /opt/subzy/domains.txt + --graylog-host {{ graylog_host }}:12201 + --graylog-app subzy-{{ inventory_hostname }} + --log-level info +``` + +--- + +## Additional Resources + +- [Graylog Documentation](https://docs.graylog.org/) +- [GELF Specification](https://docs.graylog.org/en/latest/pages/gelf.html) +- [Zerolog Documentation](https://github.com/rs/zerolog) +- [Subzy GitHub](https://github.com/LukaSikic/subzy) + +--- + +## Support + +For issues: +1. Check this guide +2. Review Graylog logs: `docker-compose logs graylog` +3. Open GitHub issue with logs and configuration diff --git a/IMPLEMENTATION_PLAN.md b/IMPLEMENTATION_PLAN.md new file mode 100644 index 0000000..15dc102 --- /dev/null +++ b/IMPLEMENTATION_PLAN.md @@ -0,0 +1,1058 @@ +# Subzy Implementation Plan - Phases 2-6 + +**Status**: In Progress +**Current Coverage**: 35.7% +**Target Coverage**: 70%+ +**Created**: 2025-11-10 + +--- + +## Phase 2: Testing Foundation (Week 2-3) โณ + +### Status: 60% Complete + +#### โœ… Completed +- [x] Add unit tests for core functions (35.7% coverage) +- [x] Set up GitHub Actions CI/CD +- [x] Add golangci-lint configuration + +#### ๐Ÿ”„ In Progress +- [ ] Increase test coverage to 70%+ (Current: 35.7%) + - [ ] Add tests for `process.go` (complex concurrency logic) + - [ ] Add tests for `download.go` (HTTP download & file operations) + - [ ] Add integration tests for end-to-end flows + - [ ] Add tests for cmd package (command handlers) + +#### ๐Ÿ“‹ TODO +- [ ] **Add pre-commit hooks** (Priority: High) + - [ ] Install pre-commit framework + - [ ] Create `.pre-commit-config.yaml` + - [ ] Configure hooks: + - `go fmt` - Format code + - `go vet` - Static analysis + - `golangci-lint` - Comprehensive linting + - `go test -short` - Run fast tests + - `go mod tidy` - Clean dependencies + - [ ] Document pre-commit setup in README + +#### Integration Tests TODO +- [ ] Create `runner/integration_test.go` + - [ ] Test full scan workflow with mock HTTP server + - [ ] Test concurrent processing with multiple subdomains + - [ ] Test JSON output generation + - [ ] Test error handling in full pipeline + - [ ] Test rate limiting behavior + - [ ] Test timeout handling + +#### Estimated Time: 1 week + +--- + +## Phase 3: Performance Optimizations (Week 4-5) ๐Ÿš€ + +### Status: 40% Complete + +#### โœ… Completed +- [x] Add response body size limits (1MB) +- [x] Improve HTTP connection pooling +- [x] Remove dead code +- [x] Basic benchmarks created + +#### ๐Ÿ“‹ TODO + +### 3.1 Optimize Fingerprint Matching (Priority: Critical) + +**Current**: O(n*m) - Linear scan through all fingerprints for each response +**Target**: O(n+m) - Single pass with efficient matching + +#### Option A: Aho-Corasick Algorithm (Recommended) +```go +// File: runner/matcher.go +package runner + +import "github.com/cloudflare/ahocorasick" + +type FingerprintMatcher struct { + matcher *ahocorasick.Matcher + fingerprints map[int]Fingerprint // Map pattern ID to fingerprint +} + +func NewFingerprintMatcher(fingerprints []Fingerprint) *FingerprintMatcher { + // Build Aho-Corasick automaton from all fingerprint strings + // Single pass through response body to find all matches +} +``` + +**Tasks**: +- [ ] Add Aho-Corasick library dependency +- [ ] Create `runner/matcher.go` with new matching logic +- [ ] Implement `NewFingerprintMatcher()` to build trie +- [ ] Implement `Match(body string)` for O(n+m) matching +- [ ] Update `Config.matchResponse()` to use new matcher +- [ ] Add benchmarks comparing old vs new approach +- [ ] Verify no performance regression for small inputs +- [ ] Expected performance: 5-10x faster for typical workloads + +#### Option B: Compiled Regex Patterns (Alternative) +```go +type CompiledFingerprint struct { + Regex *regexp.Regexp + FalsePositive []*regexp.Regexp + Original Fingerprint +} +``` + +**Tasks**: +- [ ] Pre-compile all fingerprint patterns at startup +- [ ] Pre-compile all false positive patterns +- [ ] Use `regexp.FindString()` for matching +- [ ] Benchmark against Aho-Corasick + +### 3.2 Advanced Benchmarking & Profiling + +**Tasks**: +- [ ] Add CPU profiling support + - [ ] Create `--profile-cpu` flag + - [ ] Generate `cpu.pprof` files + - [ ] Document how to analyze with `go tool pprof` + +- [ ] Add memory profiling + - [ ] Create `--profile-mem` flag + - [ ] Generate `mem.pprof` files + - [ ] Identify memory bottlenecks + +- [ ] Add comprehensive benchmarks + - [ ] `BenchmarkFullScanWorkflow` - End-to-end scan + - [ ] `BenchmarkConcurrentProcessing` - Worker pool performance + - [ ] `BenchmarkFingerprint100Targets` - Realistic workload + - [ ] `BenchmarkFingerprint1000Targets` - Stress test + +- [ ] Performance regression tests in CI + - [ ] Add `benchstat` comparison in GitHub Actions + - [ ] Fail CI if performance degrades >20% + +#### Estimated Time: 2 weeks + +--- + +## Phase 4: Quality & Security (Week 6-7) ๐Ÿ”’ + +### Status: 30% Complete + +#### โœ… Completed +- [x] Add input validation (concurrency, timeout, file checks) +- [x] Add configurable User-Agent + +#### ๐Ÿ“‹ TODO + +### 4.1 Structured Logging with Graylog Integration (Priority: Critical) + +**Architecture**: +``` +Application โ†’ Zerolog โ†’ GELF Format โ†’ Graylog Server + โ†“ + Console/File (development) +``` + +**Implementation**: + +#### Step 1: Add Zerolog Dependency +```bash +go get -u github.com/rs/zerolog +go get -u github.com/rs/zerolog/log +go get -u gopkg.in/Graylog2/go-gelf.v2/gelf +``` + +#### Step 2: Create Logger Package +```go +// File: runner/logger.go +package runner + +import ( + "io" + "os" + "gopkg.in/Graylog2/go-gelf.v2/gelf" + "github.com/rs/zerolog" + "github.com/rs/zerolog/log" +) + +type LogConfig struct { + Level string // debug, info, warn, error + Format string // json, console + GraylogHost string // e.g., "graylog.example.com:12201" + GraylogApp string // Application name + EnableFile bool // Log to file + FilePath string // Log file path +} + +func InitLogger(cfg LogConfig) (zerolog.Logger, error) { + var writers []io.Writer + + // Console output (development) + if cfg.Format == "console" { + writers = append(writers, zerolog.ConsoleWriter{ + Out: os.Stdout, + TimeFormat: "15:04:05", + }) + } + + // Graylog GELF output (production) + if cfg.GraylogHost != "" { + gelfWriter, err := gelf.NewUDPWriter(cfg.GraylogHost) + if err != nil { + return zerolog.Logger{}, err + } + gelfWriter.Facility = cfg.GraylogApp + writers = append(writers, gelfWriter) + } + + // File output + if cfg.EnableFile { + file, err := os.OpenFile(cfg.FilePath, + os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + return zerolog.Logger{}, err + } + writers = append(writers, file) + } + + multi := zerolog.MultiLevelWriter(writers...) + logger := zerolog.New(multi).With(). + Timestamp(). + Str("app", cfg.GraylogApp). + Logger() + + // Set log level + level, err := zerolog.ParseLevel(cfg.Level) + if err != nil { + level = zerolog.InfoLevel + } + logger = logger.Level(level) + + return logger, nil +} +``` + +#### Step 3: Add Config Fields +```go +// File: runner/config.go +type Config struct { + // ... existing fields ... + + // Logging + LogLevel string + LogFormat string + GraylogHost string + GraylogApp string + LogToFile bool + LogFilePath string + + logger zerolog.Logger +} +``` + +#### Step 4: Update cmd/run.go with Flags +```go +runCmd.Flags().StringVar(&opts.LogLevel, "log-level", "info", + "Log level: debug, info, warn, error") +runCmd.Flags().StringVar(&opts.LogFormat, "log-format", "console", + "Log format: json, console") +runCmd.Flags().StringVar(&opts.GraylogHost, "graylog-host", "", + "Graylog server host:port (e.g., graylog.example.com:12201)") +runCmd.Flags().StringVar(&opts.GraylogApp, "graylog-app", "subzy", + "Application name for Graylog") +runCmd.Flags().BoolVar(&opts.LogToFile, "log-file", false, + "Enable logging to file") +runCmd.Flags().StringVar(&opts.LogFilePath, "log-file-path", "subzy.log", + "Log file path") +``` + +#### Step 5: Integrate Logging Throughout Application +```go +// Example usage in process.go +logger.Info(). + Int("subdomain_count", len(subdomains)). + Int("fingerprint_count", len(fingerprints)). + Int("concurrency", config.Concurrency). + Msg("Starting subdomain scan") + +logger.Debug(). + Str("subdomain", subdomain). + Msg("Checking subdomain") + +logger.Warn(). + Str("subdomain", subdomain). + Err(err). + Msg("HTTP request failed") + +logger.Error(). + Str("subdomain", subdomain). + Str("engine", result.entry.Engine). + Msg("Vulnerable subdomain detected") +``` + +**Tasks**: +- [ ] Add zerolog and go-gelf dependencies +- [ ] Create `runner/logger.go` with initialization +- [ ] Add logging config fields to `Config` struct +- [ ] Add CLI flags for logging configuration +- [ ] Replace all `fmt.Println` with structured logging +- [ ] Add contextual logging (subdomain, engine, status) +- [ ] Create Graylog dashboard examples +- [ ] Document logging setup in README +- [ ] Add logging tests + +### 4.2 Progress Indicators + +**Library**: github.com/schollz/progressbar/v3 + +**Tasks**: +- [ ] Add progressbar dependency +- [ ] Create progress bar for subdomain scanning +- [ ] Show: `[โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‘โ–‘] 80/100 subdomains | 5 vulnerable | 2.5/s` +- [ ] Add `--no-progress` flag to disable +- [ ] Ensure progress bar works with logging +- [ ] Update when writing to JSON + +### 4.3 Retry Logic with Exponential Backoff + +**Implementation**: +```go +// File: runner/retry.go +package runner + +import ( + "context" + "math" + "time" +) + +type RetryConfig struct { + MaxRetries int + InitialBackoff time.Duration + MaxBackoff time.Duration + Multiplier float64 +} + +func (c *Config) checkSubdomainWithRetry(ctx context.Context, subdomain string) Result { + var result Result + backoff := c.RetryConfig.InitialBackoff + + for attempt := 0; attempt <= c.RetryConfig.MaxRetries; attempt++ { + result = c.checkSubdomain(subdomain) + + if result.resStatus != ResultHTTPError { + return result // Success or non-retryable error + } + + if attempt < c.RetryConfig.MaxRetries { + c.logger.Debug(). + Str("subdomain", subdomain). + Int("attempt", attempt+1). + Dur("backoff", backoff). + Msg("Retrying after error") + + select { + case <-time.After(backoff): + backoff = time.Duration(float64(backoff) * c.RetryConfig.Multiplier) + if backoff > c.RetryConfig.MaxBackoff { + backoff = c.RetryConfig.MaxBackoff + } + case <-ctx.Done(): + return result // Context cancelled + } + } + } + + return result +} +``` + +**Tasks**: +- [ ] Create `runner/retry.go` +- [ ] Add `RetryConfig` to main `Config` +- [ ] Add CLI flags: `--max-retries`, `--retry-backoff` +- [ ] Update `checkSubdomain` to use retry logic +- [ ] Add retry metrics (attempts, backoff times) +- [ ] Test retry behavior +- [ ] Default: 3 retries, 1s initial, 10s max, 2x multiplier + +### 4.4 Context Support for Cancellation + +**Tasks**: +- [ ] Update `Process()` signature: `func Process(ctx context.Context, config *Config) error` +- [ ] Propagate context to all workers +- [ ] Listen for `ctx.Done()` in worker loops +- [ ] Handle Ctrl+C gracefully (SIGINT, SIGTERM) +- [ ] Add `--timeout-total` flag for max scan duration +- [ ] Clean up resources on cancellation +- [ ] Save partial results before exit + +**Example**: +```go +func main() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + // Handle signals + sigCh := make(chan os.Signal, 1) + signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) + go func() { + <-sigCh + cancel() + }() + + if err := runner.Process(ctx, &opts); err != nil { + // ... + } +} +``` + +### 4.5 Secure TLS Defaults + +**Current Issue**: Defaults to `InsecureSkipVerify: true` + +**Tasks**: +- [ ] Change default: `VerifySSL: true` +- [ ] Update flag description: `--insecure-ssl` to explicitly skip verification +- [ ] Add warning when using `--insecure-ssl` +- [ ] Add certificate validation logging +- [ ] Support custom CA certificates +- [ ] Add `--ca-cert` flag for custom root CAs + +### 4.6 Rate Limiting + +**Library**: golang.org/x/time/rate + +**Implementation**: +```go +// File: runner/ratelimit.go +package runner + +import ( + "context" + "golang.org/x/time/rate" +) + +type RateLimiter struct { + limiter *rate.Limiter +} + +func NewRateLimiter(requestsPerSecond float64) *RateLimiter { + return &RateLimiter{ + limiter: rate.NewLimiter(rate.Limit(requestsPerSecond), 1), + } +} + +func (rl *RateLimiter) Wait(ctx context.Context) error { + return rl.limiter.Wait(ctx) +} +``` + +**Tasks**: +- [ ] Add rate limiting library +- [ ] Create `runner/ratelimit.go` +- [ ] Add `--rate-limit` flag (requests/second) +- [ ] Integrate rate limiter in worker loop +- [ ] Add per-domain rate limiting option +- [ ] Add rate limit bypass flag for testing +- [ ] Default: No limit (backwards compatible) + +#### Estimated Time: 2 weeks + +--- + +## Phase 5: Feature Enhancements (Week 8-10) โญ + +### Status: 20% Complete + +#### โœ… Completed +- [x] Add configurable User-Agent + +#### ๐Ÿ“‹ TODO + +### 5.1 DNS Checking Before HTTP + +**Purpose**: Reduce false positives by checking DNS records first + +**Implementation**: +```go +// File: runner/dns.go +package runner + +import ( + "context" + "net" + "time" +) + +type DNSResult struct { + HasCNAME bool + CNAME string + HasA bool + ARecords []string + IsDangling bool // CNAME exists but no A records +} + +func (c *Config) checkDNS(ctx context.Context, subdomain string) (*DNSResult, error) { + resolver := &net.Resolver{ + PreferGo: true, + Dial: func(ctx context.Context, network, address string) (net.Conn, error) { + d := net.Dialer{Timeout: 5 * time.Second} + return d.DialContext(ctx, network, address) + }, + } + + // Check CNAME + cname, err := resolver.LookupCNAME(ctx, subdomain) + + // Check A records + ips, err := resolver.LookupHost(ctx, subdomain) + + return &DNSResult{ + HasCNAME: cname != subdomain, + CNAME: cname, + HasA: len(ips) > 0, + ARecords: ips, + IsDangling: (cname != subdomain) && (len(ips) == 0), + }, nil +} +``` + +**Tasks**: +- [ ] Create `runner/dns.go` +- [ ] Add DNS checking before HTTP requests +- [ ] Add `--dns-check` flag (default: true) +- [ ] Add `--dns-timeout` flag +- [ ] Detect dangling CNAME records +- [ ] Add DNS results to JSON output +- [ ] Cache DNS results to avoid duplicate lookups +- [ ] Add DNS-only mode (skip HTTP) + +### 5.2 Multiple Output Formats + +**Supported Formats**: +- JSON (existing) +- CSV +- YAML +- HTML (web report) +- Markdown (for documentation) +- Plain text (simple list) + +**Implementation**: +```go +// File: runner/output.go +package runner + +type OutputFormatter interface { + Format(results []*subdomainResult) ([]byte, error) +} + +type JSONFormatter struct{} +type CSVFormatter struct{} +type YAMLFormatter struct{} +type HTMLFormatter struct{} +type MarkdownFormatter struct{} +``` + +**Tasks**: +- [ ] Create `runner/output.go` with formatter interface +- [ ] Implement JSON formatter (refactor existing) +- [ ] Implement CSV formatter +- [ ] Implement YAML formatter +- [ ] Implement HTML formatter with styling +- [ ] Implement Markdown formatter +- [ ] Add `--output-format` flag +- [ ] Auto-detect format from file extension +- [ ] Add templates for HTML/Markdown +- [ ] Support multiple output files simultaneously + +### 5.3 Statistics and Summary Reporting + +**Metrics to Track**: +- Total subdomains scanned +- Vulnerable count +- Not vulnerable count +- HTTP errors count +- Response errors count +- Scan duration +- Average response time +- Requests per second +- Success rate +- Unique engines found + +**Implementation**: +```go +// File: runner/stats.go +package runner + +import "time" + +type ScanStatistics struct { + TotalScanned int + Vulnerable int + NotVulnerable int + HTTPErrors int + ResponseErrors int + StartTime time.Time + EndTime time.Time + Duration time.Duration + AvgResponseTime time.Duration + RequestsPerSec float64 + SuccessRate float64 + EnginesFound map[string]int +} + +func (s *ScanStatistics) Print() { + fmt.Println("\n=== Scan Summary ===") + fmt.Printf("Total Scanned: %d\n", s.TotalScanned) + fmt.Printf("Vulnerable: %d (%.1f%%)\n", s.Vulnerable, ...) + fmt.Printf("Not Vulnerable: %d\n", s.NotVulnerable) + fmt.Printf("Errors: %d\n", s.HTTPErrors+s.ResponseErrors) + fmt.Printf("Duration: %s\n", s.Duration) + fmt.Printf("Throughput: %.2f req/s\n", s.RequestsPerSec) + // ... +} +``` + +**Tasks**: +- [ ] Create `runner/stats.go` +- [ ] Track metrics during scan +- [ ] Print summary at end of scan +- [ ] Add `--stats` flag to enable/disable +- [ ] Add statistics to JSON output +- [ ] Export stats to separate file +- [ ] Create charts/graphs for HTML output +- [ ] Real-time stats with `--live-stats` flag + +### 5.4 Plugin System for Custom Fingerprints + +**Architecture**: +``` +~/.subzy/ +โ”œโ”€โ”€ fingerprints.json # Official fingerprints +โ”œโ”€โ”€ custom/ +โ”‚ โ”œโ”€โ”€ company-services.json # Custom fingerprints +โ”‚ โ”œโ”€โ”€ internal-apps.json +โ”‚ โ””โ”€โ”€ legacy-systems.json +โ””โ”€โ”€ plugins/ + โ””โ”€โ”€ fingerprint-loader.so # Optional: Go plugins +``` + +**Implementation**: +```go +// File: runner/plugins.go +package runner + +type FingerprintSource interface { + Load() ([]Fingerprint, error) + Name() string +} + +type LocalFileSource struct { + Path string +} + +type RemoteURLSource struct { + URL string +} + +type PluginManager struct { + sources []FingerprintSource +} + +func (pm *PluginManager) LoadAllFingerprints() ([]Fingerprint, error) { + var all []Fingerprint + for _, source := range pm.sources { + fps, err := source.Load() + if err != nil { + log.Warn().Err(err).Str("source", source.Name()).Msg("Failed to load") + continue + } + all = append(all, fps...) + } + return all, nil +} +``` + +**Tasks**: +- [ ] Create `runner/plugins.go` +- [ ] Support loading from multiple JSON files +- [ ] Add `--custom-fingerprints` flag (comma-separated paths) +- [ ] Auto-load from `~/.subzy/custom/` directory +- [ ] Support remote fingerprint URLs +- [ ] Add fingerprint validation +- [ ] Merge duplicate fingerprints +- [ ] Add fingerprint priority/ordering +- [ ] Create fingerprint testing tool +- [ ] Document custom fingerprint format + +#### Estimated Time: 3 weeks + +--- + +## Phase 6: Advanced Features (Future) ๐Ÿš€ + +### Status: 0% Complete (Planning Only) + +### 6.1 API Server Mode + +**Purpose**: Run Subzy as a RESTful API service + +**Tech Stack**: +- Framework: `gin-gonic/gin` or `gorilla/mux` +- Authentication: JWT tokens +- Rate limiting: Per API key +- Database: PostgreSQL or SQLite for results + +**Endpoints**: +``` +POST /api/v1/scans - Create new scan +GET /api/v1/scans/:id - Get scan status +GET /api/v1/scans/:id/results - Get scan results +GET /api/v1/scans - List all scans +DELETE /api/v1/scans/:id - Delete scan +GET /api/v1/health - Health check +GET /api/v1/metrics - Prometheus metrics +``` + +**Tasks**: +- [ ] Design API specification (OpenAPI/Swagger) +- [ ] Create `server/` package +- [ ] Implement HTTP server with Gin +- [ ] Add JWT authentication +- [ ] Implement scan queue with workers +- [ ] Add database layer for persistence +- [ ] Add WebSocket for real-time updates +- [ ] Add API rate limiting +- [ ] Create Swagger documentation +- [ ] Add Prometheus metrics endpoint +- [ ] Docker containerization +- [ ] Kubernetes deployment manifests + +### 6.2 Web Dashboard + +**Purpose**: Web UI for managing scans and viewing results + +**Tech Stack**: +- Frontend: React + TypeScript +- UI Library: Material-UI or Tailwind CSS +- Charts: Recharts or Chart.js +- State: Redux or Zustand +- Build: Vite + +**Features**: +- Dashboard with scan statistics +- Submit new scans +- View scan results in table +- Filter and search results +- Export results (CSV, JSON, PDF) +- User management +- Scan scheduling +- Historical trends +- Vulnerability timeline + +**Tasks**: +- [ ] Create `web/` directory +- [ ] Set up React + TypeScript project +- [ ] Design UI mockups +- [ ] Implement dashboard view +- [ ] Implement scan submission form +- [ ] Implement results table with filters +- [ ] Add charts and visualizations +- [ ] Integrate with API backend +- [ ] Add authentication flow +- [ ] Responsive design +- [ ] Dark mode support +- [ ] Build and embed into Go binary + +### 6.3 Historical Tracking + +**Purpose**: Track vulnerability changes over time + +**Database Schema**: +```sql +CREATE TABLE scans ( + id UUID PRIMARY KEY, + created_at TIMESTAMP, + target_count INT, + vulnerable_count INT, + duration_ms INT, + status VARCHAR(20) +); + +CREATE TABLE scan_results ( + id UUID PRIMARY KEY, + scan_id UUID REFERENCES scans(id), + subdomain VARCHAR(255), + status VARCHAR(50), + engine VARCHAR(100), + created_at TIMESTAMP +); + +CREATE TABLE vulnerability_history ( + subdomain VARCHAR(255), + engine VARCHAR(100), + first_seen TIMESTAMP, + last_seen TIMESTAMP, + scan_count INT, + PRIMARY KEY (subdomain, engine) +); +``` + +**Features**: +- Store all scan results +- Track when vulnerabilities first appear +- Track when vulnerabilities are fixed +- Generate trend reports +- Alert on new vulnerabilities +- Compare scans over time + +**Tasks**: +- [ ] Choose database (PostgreSQL recommended) +- [ ] Create database schema +- [ ] Add database migrations +- [ ] Implement data access layer +- [ ] Store scan results automatically +- [ ] Add `--save-history` flag +- [ ] Query historical data +- [ ] Generate trend reports +- [ ] Add `subzy history` command +- [ ] Export historical data + +### 6.4 Notification Integrations + +**Supported Channels**: +- Slack +- Discord +- Microsoft Teams +- Email (SMTP) +- PagerDuty +- Webhooks (generic) +- Telegram + +**Implementation**: +```go +// File: runner/notifications.go +package runner + +type Notifier interface { + Notify(result *NotificationPayload) error +} + +type NotificationPayload struct { + Severity string + Subdomain string + Engine string + Description string + Timestamp time.Time +} + +type SlackNotifier struct { + WebhookURL string +} + +type DiscordNotifier struct { + WebhookURL string +} + +// ... etc +``` + +**Tasks**: +- [ ] Create `runner/notifications.go` +- [ ] Implement Slack notifier +- [ ] Implement Discord notifier +- [ ] Implement Email notifier +- [ ] Implement webhook notifier +- [ ] Add notification config file +- [ ] Add `--notify-on` flag (vulnerable, error, all) +- [ ] Template system for messages +- [ ] Batch notifications +- [ ] Test all notification channels + +### 6.5 ML-Based Detection + +**Purpose**: Use machine learning to improve detection accuracy + +**Approach**: +1. **Feature Engineering**: + - Response body length + - Response headers + - Status code + - Response time + - HTML structure + - JavaScript presence + - CSS patterns + - DNS records + +2. **Model Training**: + - Collect labeled dataset (vulnerable vs not) + - Train classifier (Random Forest, XGBoost, or Neural Network) + - Validate on test set + - Deploy model + +3. **Inference**: + - Extract features from response + - Run model prediction + - Combine with fingerprint matching + - Confidence scoring + +**Tasks**: +- [ ] Research existing takeover datasets +- [ ] Collect training data +- [ ] Feature extraction implementation +- [ ] Model training pipeline (Python + scikit-learn) +- [ ] Model export (ONNX or TensorFlow Lite) +- [ ] Go ML inference library integration +- [ ] Confidence scoring system +- [ ] False positive learning +- [ ] Model versioning and updates +- [ ] A/B testing framework + +#### Estimated Time: 12+ weeks (long-term roadmap) + +--- + +## Implementation Priority Matrix + +| Feature | Priority | Complexity | Impact | Est. Time | +|---------|----------|------------|--------|-----------| +| Pre-commit hooks | High | Low | Medium | 2 days | +| Test coverage to 70% | High | Medium | High | 1 week | +| Structured logging + Graylog | Critical | Medium | High | 1 week | +| Fingerprint optimization | Critical | High | Very High | 1 week | +| Context cancellation | High | Medium | Medium | 3 days | +| Progress indicators | Medium | Low | Medium | 2 days | +| Retry logic | High | Medium | High | 3 days | +| Rate limiting | High | Medium | High | 3 days | +| DNS checking | Medium | Medium | High | 1 week | +| Statistics reporting | Medium | Low | Medium | 3 days | +| Multiple output formats | Medium | Medium | Medium | 1 week | +| Secure TLS defaults | High | Low | High | 2 days | +| Plugin system | Low | High | Medium | 2 weeks | +| API server mode | Low | Very High | Medium | 4+ weeks | +| Web dashboard | Low | Very High | Medium | 6+ weeks | +| Historical tracking | Low | High | Medium | 3 weeks | +| Notifications | Low | Medium | Medium | 2 weeks | +| ML detection | Low | Very High | High | 12+ weeks | + +--- + +## Quick Start Roadmap (Next 4 Weeks) + +### Week 1 +- [ ] Pre-commit hooks setup +- [ ] Increase test coverage to 50% +- [ ] Start structured logging implementation + +### Week 2 +- [ ] Complete structured logging + Graylog +- [ ] Implement context cancellation +- [ ] Add progress indicators +- [ ] Increase test coverage to 70% + +### Week 3 +- [ ] Fingerprint matching optimization (Aho-Corasick) +- [ ] Retry logic with exponential backoff +- [ ] Rate limiting + +### Week 4 +- [ ] DNS checking +- [ ] Secure TLS defaults +- [ ] Statistics reporting +- [ ] Performance benchmarking + +--- + +## Success Metrics + +### Phase 2-3 (Testing & Performance) +- โœ… Test coverage โ‰ฅ 70% +- โœ… All CI checks passing +- โœ… Pre-commit hooks functional +- โœ… 5-10x faster fingerprint matching +- โœ… Memory usage stable under load + +### Phase 4 (Quality & Security) +- โœ… Structured logging operational +- โœ… Graylog integration working +- โœ… Retry logic reduces transient failures by 80%+ +- โœ… Graceful cancellation works +- โœ… TLS verification enabled by default + +### Phase 5 (Features) +- โœ… DNS checking reduces false positives +- โœ… 5+ output formats supported +- โœ… Statistics provide actionable insights +- โœ… Plugin system allows custom fingerprints + +### Phase 6 (Advanced) +- โœ… API server handles 100+ concurrent scans +- โœ… Web dashboard fully functional +- โœ… Historical tracking operational +- โœ… Notifications working for all channels +- โœ… ML model improves accuracy by 10%+ + +--- + +## Dependencies to Add + +```bash +# Phase 4: Logging +go get -u github.com/rs/zerolog +go get -u gopkg.in/Graylog2/go-gelf.v2/gelf + +# Phase 4: Progress +go get -u github.com/schollz/progressbar/v3 + +# Phase 4: Rate Limiting +go get -u golang.org/x/time/rate + +# Phase 3: Fingerprint Matching +go get -u github.com/cloudflare/ahocorasick +# OR +go get -u github.com/anknown/ahocorasick + +# Phase 5: Output Formats +go get -u gopkg.in/yaml.v3 +go get -u github.com/olekukonko/tablewriter + +# Phase 6: API Server +go get -u github.com/gin-gonic/gin +go get -u github.com/golang-jwt/jwt/v5 +go get -u gorm.io/gorm +go get -u gorm.io/driver/postgres + +# Phase 6: ML +go get -u github.com/owulveryck/onnx-go +``` + +--- + +## Documentation Updates Needed + +- [ ] README: Add Graylog setup instructions +- [ ] README: Add pre-commit hooks setup +- [ ] README: Add all new CLI flags +- [ ] CONTRIBUTING.md: Add testing guidelines +- [ ] CONTRIBUTING.md: Add logging guidelines +- [ ] ARCHITECTURE.md: New document explaining design +- [ ] API.md: API documentation (Phase 6) +- [ ] DEPLOYMENT.md: Docker/Kubernetes guide (Phase 6) + +--- + +## Notes + +- All phases build incrementally on previous work +- Phases 2-4 should be completed before Phase 5 +- Phase 6 is long-term and can be done in parallel once core is stable +- Backward compatibility must be maintained +- All new features should have tests +- All breaking changes require major version bump + +--- + +**Last Updated**: 2025-11-10 +**Next Review**: Every 2 weeks during active development diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..cff4eae --- /dev/null +++ b/Makefile @@ -0,0 +1,107 @@ +.PHONY: help build test test-verbose test-race test-coverage bench lint clean install run + +# Default target +help: + @echo "Subzy - Subdomain Takeover Detection Tool" + @echo "" + @echo "Available targets:" + @echo " build - Build the binary" + @echo " test - Run tests" + @echo " test-verbose - Run tests with verbose output" + @echo " test-race - Run tests with race detector" + @echo " test-coverage - Run tests with coverage report" + @echo " bench - Run benchmark tests" + @echo " lint - Run golangci-lint" + @echo " clean - Remove build artifacts" + @echo " install - Install the binary to GOPATH/bin" + @echo " run - Build and run with example targets" + +# Build the application +build: + @echo "Building subzy..." + go build -o subzy main.go + @echo "Build complete: ./subzy" + +# Run all tests +test: + @echo "Running tests..." + go test ./... + +# Run tests with verbose output +test-verbose: + @echo "Running tests (verbose)..." + go test -v ./... + +# Run tests with race detector +test-race: + @echo "Running tests with race detector..." + go test -race ./... + +# Run tests with coverage +test-coverage: + @echo "Running tests with coverage..." + go test -cover ./... + @echo "" + @echo "Generating detailed coverage report..." + go test -coverprofile=coverage.out ./... + go tool cover -html=coverage.out -o coverage.html + @echo "Coverage report generated: coverage.html" + +# Run benchmark tests +bench: + @echo "Running benchmarks..." + go test -bench=. -benchmem ./runner + +# Run linter +lint: + @echo "Running golangci-lint..." + @which golangci-lint > /dev/null || (echo "golangci-lint not installed. Install from https://golangci-lint.run/usage/install/" && exit 1) + golangci-lint run + +# Clean build artifacts +clean: + @echo "Cleaning build artifacts..." + rm -f subzy + rm -f coverage.out coverage.html + @echo "Clean complete" + +# Install to GOPATH/bin +install: + @echo "Installing subzy..." + go install -v + +# Example run +run: build + @echo "Running subzy with example targets..." + @echo "Note: Create list.txt with test domains first" + ./subzy run --help + +# Development helpers +fmt: + @echo "Formatting code..." + go fmt ./... + +vet: + @echo "Running go vet..." + go vet ./... + +# Run all checks (fmt, vet, test, lint) +check: fmt vet test lint + @echo "All checks passed!" + +# Update dependencies +deps-update: + @echo "Updating dependencies..." + go get -u ./... + go mod tidy + +# Show dependency graph +deps-graph: + @echo "Dependency graph:" + go mod graph + +# Check for security vulnerabilities +security: + @echo "Checking for security vulnerabilities..." + @which govulncheck > /dev/null || go install golang.org/x/vuln/cmd/govulncheck@latest + govulncheck ./... diff --git a/NEXT_STEPS.md b/NEXT_STEPS.md new file mode 100644 index 0000000..14f5bd5 --- /dev/null +++ b/NEXT_STEPS.md @@ -0,0 +1,344 @@ +# Next Steps - Quick Start Guide + +## Immediate Actions (This Week) + +### 1. Pre-commit Hooks Setup (2 hours) + +```bash +# Install pre-commit +pip install pre-commit + +# Create .pre-commit-config.yaml +cat > .pre-commit-config.yaml << 'EOF' +repos: + - repo: local + hooks: + - id: go-fmt + name: Go Format + entry: gofmt -w + language: system + files: \.go$ + + - id: go-vet + name: Go Vet + entry: go vet ./... + language: system + pass_filenames: false + + - id: golangci-lint + name: golangci-lint + entry: golangci-lint run + language: system + pass_filenames: false + + - id: go-test-short + name: Go Test (short) + entry: go test -short ./... + language: system + pass_filenames: false + + - id: go-mod-tidy + name: Go Mod Tidy + entry: go mod tidy + language: system + pass_filenames: false +EOF + +# Install hooks +pre-commit install + +# Test hooks +pre-commit run --all-files +``` + +### 2. Structured Logging with Graylog (1 week) + +**Day 1-2: Setup Dependencies** +```bash +go get -u github.com/rs/zerolog +go get -u github.com/rs/zerolog/log +go get -u gopkg.in/Graylog2/go-gelf.v2/gelf +``` + +**Day 3-4: Implement Logger** +- Create `runner/logger.go` (see IMPLEMENTATION_PLAN.md) +- Add config fields and CLI flags +- Initialize logger in main.go + +**Day 5-7: Replace fmt.Println** +- Update all `fmt.Println` โ†’ `logger.Info()` +- Update all `fmt.Printf` โ†’ `logger.Debug()` +- Add structured fields (subdomain, engine, status) +- Test Graylog integration + +**Testing Graylog Locally:** +```bash +# Docker Compose for local Graylog +docker-compose up -d + +# Test with subzy +./subzy run \ + --targets list.txt \ + --log-level debug \ + --log-format json \ + --graylog-host localhost:12201 \ + --graylog-app subzy-test +``` + +### 3. Increase Test Coverage to 70% (1 week) + +**Priority Files:** +1. `process.go` - Add concurrency tests +2. `download.go` - Add HTTP download tests +3. `cmd/run.go` - Add command tests +4. Integration tests - End-to-end workflows + +**Commands:** +```bash +# Check current coverage +go test -cover ./... + +# Generate coverage report +go test -coverprofile=coverage.out ./... +go tool cover -html=coverage.out -o coverage.html +open coverage.html + +# Find untested code +go test -coverprofile=coverage.out ./... +go tool cover -func=coverage.out | grep -v "100.0%" +``` + +### 4. Fingerprint Matching Optimization (1 week) + +**Using Aho-Corasick:** +```bash +# Add dependency +go get -u github.com/cloudflare/ahocorasick + +# Create runner/matcher.go +# See IMPLEMENTATION_PLAN.md for full implementation + +# Benchmark comparison +go test -bench=BenchmarkMatch -benchmem ./runner +``` + +**Expected Results:** +- Before: ~100 ยตs per match (44 fingerprints) +- After: ~10-20 ยตs per match (5-10x faster) + +--- + +## CLI Enhancements + +### New Flags to Add + +```go +// Logging +--log-level=info // debug, info, warn, error +--log-format=console // console, json +--graylog-host= // host:port +--graylog-app=subzy // app name +--log-file // enable file logging +--log-file-path=subzy.log // log file path + +// Retry & Resilience +--max-retries=3 // retry attempts +--retry-backoff=1s // initial backoff +--timeout-total=5m // max scan duration + +// Rate Limiting +--rate-limit=0 // requests/second (0=unlimited) + +// DNS +--dns-check // check DNS before HTTP +--dns-timeout=5s // DNS timeout +--dns-only // DNS-only mode + +// Output +--output-format=json // json, csv, yaml, html, md +--stats // print statistics +--no-progress // disable progress bar + +// Security +--insecure-ssl // skip SSL verification (default: verify) +--ca-cert= // custom CA certificate + +// Advanced +--profile-cpu=cpu.pprof // CPU profiling +--profile-mem=mem.pprof // Memory profiling +--custom-fingerprints= // custom fingerprint files +``` + +--- + +## Testing Checklist + +### Unit Tests +- [ ] `helpers_test.go` - 100% coverage โœ… +- [ ] `worker_test.go` - 90% coverage โœ… +- [ ] `fingerprints_test.go` - 95% coverage โœ… +- [ ] `reader_test.go` - 95% coverage โœ… +- [ ] `config_test.go` - 85% coverage โœ… +- [ ] `process_test.go` - Need to add +- [ ] `download_test.go` - Need to add +- [ ] `logger_test.go` - Need to add +- [ ] `matcher_test.go` - Need to add + +### Integration Tests +- [ ] End-to-end scan workflow +- [ ] Concurrent processing +- [ ] Error handling +- [ ] Output generation +- [ ] Context cancellation + +### Benchmark Tests +- [ ] Fingerprint matching +- [ ] Concurrent processing +- [ ] Memory usage +- [ ] Throughput + +--- + +## Graylog Dashboard Setup + +### 1. Install Graylog (Docker) + +```yaml +# docker-compose.yml +version: '3' +services: + mongodb: + image: mongo:5.0 + + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.17.9 + environment: + - "discovery.type=single-node" + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + + graylog: + image: graylog/graylog:5.0 + environment: + - GRAYLOG_HTTP_EXTERNAL_URI=http://127.0.0.1:9000/ + - GRAYLOG_PASSWORD_SECRET=somepasswordpepper + - GRAYLOG_ROOT_PASSWORD_SHA2=8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918 + ports: + - "9000:9000" # Web interface + - "12201:12201/udp" # GELF UDP + depends_on: + - mongodb + - elasticsearch +``` + +```bash +docker-compose up -d +``` + +### 2. Configure Input + +1. Open http://localhost:9000 +2. Login: admin / admin +3. System โ†’ Inputs โ†’ Select "GELF UDP" โ†’ Launch new input +4. Port: 12201 +5. Save + +### 3. Create Dashboards + +**Scan Overview:** +- Total scans +- Vulnerable subdomains +- Scan duration trends +- Error rate + +**Vulnerability Tracking:** +- Vulnerabilities by engine +- New vulnerabilities over time +- Top vulnerable domains + +**Performance Metrics:** +- Requests per second +- Average response time +- Error breakdown + +--- + +## Performance Targets + +| Metric | Current | Target | Method | +|--------|---------|--------|--------| +| Test Coverage | 35.7% | 70%+ | Add unit & integration tests | +| Fingerprint Match | ~100 ยตs | ~20 ยตs | Aho-Corasick algorithm | +| Throughput | ~10-100/s | 500+/s | Optimizations + concurrency | +| Memory Usage | Unbounded | <100MB | Response limits + pooling | +| Error Recovery | 0% | 80%+ | Retry logic | + +--- + +## Development Workflow + +### Before Committing +```bash +# Format code +go fmt ./... + +# Run linter +golangci-lint run + +# Run tests +go test -race -cover ./... + +# Run benchmarks +go test -bench=. -benchmem ./runner + +# Build +go build -o subzy main.go +``` + +### During Development +```bash +# Watch tests +go install github.com/cespare/reflex@latest +reflex -r '\.go$' -s -- sh -c 'go test ./...' + +# Live reload +go install github.com/cosmtrek/air@latest +air +``` + +### Code Review Checklist +- [ ] Tests added/updated +- [ ] Documentation updated +- [ ] CHANGELOG.md updated +- [ ] No breaking changes (or documented) +- [ ] Linter passes +- [ ] Coverage maintained/improved +- [ ] Benchmarks show no regression + +--- + +## Resources + +### Documentation +- [Zerolog Guide](https://github.com/rs/zerolog) +- [Graylog Docs](https://docs.graylog.org/) +- [Aho-Corasick Algorithm](https://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_algorithm) +- [Go Testing Best Practices](https://go.dev/doc/tutorial/add-a-test) + +### Tools +- [golangci-lint](https://golangci-lint.run/) +- [pre-commit](https://pre-commit.com/) +- [gopls](https://pkg.go.dev/golang.org/x/tools/gopls) +- [delve](https://github.com/go-delve/delve) (debugger) + +--- + +## Questions? + +See detailed implementation in `IMPLEMENTATION_PLAN.md` + +For phase-by-phase breakdown, refer to sections: +- Phase 2: Testing Foundation +- Phase 3: Performance Optimizations +- Phase 4: Quality & Security (Graylog integration) +- Phase 5: Feature Enhancements +- Phase 6: Advanced Features diff --git a/cmd/root.go b/cmd/root.go index 54945e1..b35aa0c 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -10,6 +10,6 @@ var rootCmd = &cobra.Command{ }, } -func Execute() { - rootCmd.Execute() +func Execute() error { + return rootCmd.Execute() } diff --git a/cmd/run.go b/cmd/run.go index 1dcf3ac..6cc6770 100644 --- a/cmd/run.go +++ b/cmd/run.go @@ -16,6 +16,29 @@ var runCmd = &cobra.Command{ Short: "Run subzy", Aliases: []string{"r"}, RunE: func(cmd *cobra.Command, args []string) error { + // Input validation + if opts.Target == "" && opts.Targets == "" { + return fmt.Errorf("either --target or --targets must be specified") + } + + if opts.Target != "" && opts.Targets != "" { + return fmt.Errorf("cannot specify both --target and --targets") + } + + if opts.Targets != "" { + if _, err := os.Stat(opts.Targets); err != nil { + return fmt.Errorf("targets file does not exist: %v", err) + } + } + + if opts.Concurrency <= 0 { + return fmt.Errorf("concurrency must be greater than 0") + } + + if opts.Timeout <= 0 { + return fmt.Errorf("timeout must be greater than 0") + } + fingerprintsPath, err := runner.GetFingerprintPath() if err != nil { return err @@ -36,14 +59,31 @@ var runCmd = &cobra.Command{ } func init() { + // Target configuration runCmd.Flags().StringVar(&opts.Target, "target", "", "Comma separated list of domains") runCmd.Flags().StringVar(&opts.Targets, "targets", "", "File containing the list of subdomains") runCmd.Flags().StringVar(&opts.Output, "output", "", "JSON output filename") + + // HTTP configuration + runCmd.Flags().StringVar(&opts.UserAgent, "user-agent", "", "Custom User-Agent string (default: Subzy/1.1.0)") runCmd.Flags().BoolVar(&opts.HTTPS, "https", false, "Force https protocol if not no protocol defined for target (default false)") runCmd.Flags().BoolVar(&opts.VerifySSL, "verify_ssl", false, "If set to true it won't check sites with insecure SSL and return HTTP Error") + runCmd.Flags().IntVar(&opts.Timeout, "timeout", 10, "Request timeout in seconds") + + // Output configuration runCmd.Flags().BoolVar(&opts.HideFails, "hide_fails", false, "Don't display failed results") runCmd.Flags().BoolVar(&opts.OnlyVuln, "vuln", false, "Save only vulnerable subdomains") + + // Performance configuration runCmd.Flags().IntVar(&opts.Concurrency, "concurrency", 10, "Number of concurrent checks") - runCmd.Flags().IntVar(&opts.Timeout, "timeout", 10, "Request timeout in seconds") + + // Logging configuration + runCmd.Flags().StringVar(&opts.LogLevel, "log-level", "info", "Log level: debug, info, warn, error") + runCmd.Flags().StringVar(&opts.LogFormat, "log-format", "console", "Log format: console, json") + runCmd.Flags().StringVar(&opts.GraylogHost, "graylog-host", "", "Graylog server (e.g., graylog.example.com:12201)") + runCmd.Flags().StringVar(&opts.GraylogApp, "graylog-app", "subzy", "Application name for Graylog") + runCmd.Flags().BoolVar(&opts.LogToFile, "log-file", false, "Enable logging to file") + runCmd.Flags().StringVar(&opts.LogFilePath, "log-file-path", "subzy.log", "Log file path") + rootCmd.AddCommand(runCmd) } diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..9c024fb --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,67 @@ +# Docker Compose for local Graylog testing +# Usage: docker-compose up -d +# Access Graylog at: http://localhost:9000 (admin/admin) +# GELF UDP input: localhost:12201 + +version: '3' + +services: + # MongoDB for Graylog metadata + mongodb: + image: mongo:5.0 + container_name: subzy-mongodb + volumes: + - mongo_data:/data/db + networks: + - graylog + + # Elasticsearch for log storage + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:7.17.9 + container_name: subzy-elasticsearch + environment: + - "discovery.type=single-node" + - "ES_JAVA_OPTS=-Xms512m -Xmx512m" + - "xpack.security.enabled=false" + volumes: + - es_data:/usr/share/elasticsearch/data + networks: + - graylog + + # Graylog server + graylog: + image: graylog/graylog:5.0 + container_name: subzy-graylog + environment: + # CHANGE THIS SECRET! + - GRAYLOG_PASSWORD_SECRET=somepasswordpepper + # Password: admin (SHA-256 hash) + - GRAYLOG_ROOT_PASSWORD_SHA2=8c6976e5b5410415bde908bd4dee15dfb167a9c873fc4bb8a81f6f2ab448a918 + - GRAYLOG_HTTP_EXTERNAL_URI=http://127.0.0.1:9000/ + - GRAYLOG_MONGODB_URI=mongodb://mongodb:27017/graylog + - GRAYLOG_ELASTICSEARCH_HOSTS=http://elasticsearch:9200 + depends_on: + - mongodb + - elasticsearch + ports: + - "9000:9000" # Web interface + - "12201:12201/udp" # GELF UDP + - "1514:1514" # Syslog TCP + - "1514:1514/udp" # Syslog UDP + - "12201:12201" # GELF TCP + volumes: + - graylog_data:/usr/share/graylog/data + networks: + - graylog + +networks: + graylog: + driver: bridge + +volumes: + mongo_data: + driver: local + es_data: + driver: local + graylog_data: + driver: local diff --git a/go.mod b/go.mod index 8f56211..866bc58 100644 --- a/go.mod +++ b/go.mod @@ -1,14 +1,21 @@ module github.com/LukaSikic/subzy -go 1.19 +go 1.24.0 + +toolchain go1.24.7 require ( github.com/logrusorgru/aurora v2.0.3+incompatible github.com/mitchellh/go-homedir v1.1.0 - github.com/spf13/cobra v1.6.1 + github.com/spf13/cobra v1.10.1 ) require ( - github.com/inconshreveable/mousetrap v1.0.1 // indirect - github.com/spf13/pflag v1.0.5 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/mattn/go-colorable v0.1.14 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/rs/zerolog v1.34.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect + golang.org/x/sys v0.38.0 // indirect + gopkg.in/Graylog2/go-gelf.v2 v2.0.0-20191017102106-1550ee647df0 // indirect ) diff --git a/go.sum b/go.sum index ae59d86..1f826e6 100644 --- a/go.sum +++ b/go.sum @@ -1,14 +1,35 @@ -github.com/cpuguy83/go-md2man/v2 v2.0.2/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= -github.com/inconshreveable/mousetrap v1.0.1 h1:U3uMjPSQEBMNp1lFxmllqCPM6P5u/Xq7Pgzkat/bFNc= -github.com/inconshreveable/mousetrap v1.0.1/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= +github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/logrusorgru/aurora v2.0.3+incompatible h1:tOpm7WcpBTn4fjmVfgpQq0EfczGlG91VSDkswnjF5A8= github.com/logrusorgru/aurora v2.0.3+incompatible/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0= +github.com/rs/zerolog v1.34.0 h1:k43nTLIwcTVQAncfCw4KZ2VY6ukYoZaBPNOE8txlOeY= +github.com/rs/zerolog v1.34.0/go.mod h1:bJsvje4Z08ROH4Nhs5iH600c3IkWhwp44iRc54W6wYQ= github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= -github.com/spf13/cobra v1.6.1 h1:o94oiPyS4KD1mPy2fmcYYHHfCxLqYjJOhGsCHFZtEzA= -github.com/spf13/cobra v1.6.1/go.mod h1:IOw/AERYS7UzyrGinqmz6HLUo219MORXGxhbaJUqzrY= -github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= -github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/cobra v1.10.1 h1:lJeBwCfmrnXthfAupyUTzJ/J4Nc1RsHC/mSRU2dll/s= +github.com/spf13/cobra v1.10.1/go.mod h1:7SmJGaTHFVBY0jW4NXGluQoLvhqFQM+6XSKD+P4XaB0= +github.com/spf13/pflag v1.0.9/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +gopkg.in/Graylog2/go-gelf.v2 v2.0.0-20191017102106-1550ee647df0 h1:Xg23ydYYJLmb9AK3XdcEpplHZd1MpN3X2ZeeMoBClmY= +gopkg.in/Graylog2/go-gelf.v2 v2.0.0-20191017102106-1550ee647df0/go.mod h1:CeDeqW4tj9FrgZXF/dQCWZrBdcZWWBenhJtxLH4On2g= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go index 2bab974..b8b7930 100644 --- a/main.go +++ b/main.go @@ -1,7 +1,15 @@ package main -import "github.com/LukaSikic/subzy/cmd" +import ( + "fmt" + "os" + + "github.com/LukaSikic/subzy/cmd" +) func main() { - cmd.Execute() + if err := cmd.Execute(); err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + os.Exit(1) + } } diff --git a/runner/benchmarks_test.go b/runner/benchmarks_test.go new file mode 100644 index 0000000..8823865 --- /dev/null +++ b/runner/benchmarks_test.go @@ -0,0 +1,137 @@ +package runner + +import ( + "testing" +) + +func BenchmarkMatchResponse(b *testing.B) { + config := &Config{ + fingerprints: []Fingerprint{ + { + Engine: "Service1", + Fingerprint: "error-message-1", + FalsePositive: []string{}, + }, + { + Engine: "Service2", + Fingerprint: "error-message-2", + FalsePositive: []string{"false-positive"}, + }, + { + Engine: "Service3", + Fingerprint: "error-message-3", + FalsePositive: []string{}, + }, + }, + } + + body := "This is a test response body that contains error-message-2 and some other text to make it realistic" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + config.matchResponse(body) + } +} + +func BenchmarkMatchResponseNoMatch(b *testing.B) { + config := &Config{ + fingerprints: []Fingerprint{ + { + Engine: "Service1", + Fingerprint: "error-message-1", + FalsePositive: []string{}, + }, + { + Engine: "Service2", + Fingerprint: "error-message-2", + FalsePositive: []string{}, + }, + }, + } + + body := "This is a test response body with no matching fingerprints" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + config.matchResponse(body) + } +} + +func BenchmarkMatchResponseWithFalsePositive(b *testing.B) { + config := &Config{ + fingerprints: []Fingerprint{ + { + Engine: "Service1", + Fingerprint: "error-occurred", + FalsePositive: []string{"but-its-ok", "no-worries", "all-good"}, + }, + }, + } + + body := "error-occurred but-its-ok so everything is fine" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + config.matchResponse(body) + } +} + +func BenchmarkMatchResponseManyFingerprints(b *testing.B) { + // Simulate realistic scenario with 44 fingerprints + fingerprints := make([]Fingerprint, 44) + for i := 0; i < 44; i++ { + fingerprints[i] = Fingerprint{ + Engine: "Service", + Fingerprint: "unique-error", + FalsePositive: []string{}, + } + } + + config := &Config{ + fingerprints: fingerprints, + } + + body := "This is a test response body with no matching fingerprints but lots of text to process" + + b.ResetTimer() + for i := 0; i < b.N; i++ { + config.matchResponse(body) + } +} + +func BenchmarkIsValidUrl(b *testing.B) { + urls := []string{ + "http://example.com", + "https://subdomain.example.com/path", + "example.com", + "http://example.com:8080/path?query=value", + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + for _, url := range urls { + isValidUrl(url) + } + } +} + +func BenchmarkInitHTTPClient(b *testing.B) { + config := &Config{ + Concurrency: 10, + Timeout: 10, + VerifySSL: false, + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + config.initHTTPClient() + } +} + +func BenchmarkIsEnabled(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + isEnabled(true) + isEnabled(false) + } +} diff --git a/runner/config.go b/runner/config.go index 8374aa9..6e63d74 100644 --- a/runner/config.go +++ b/runner/config.go @@ -4,6 +4,8 @@ import ( "crypto/tls" "net/http" "time" + + "github.com/rs/zerolog" ) type Config struct { @@ -17,13 +19,30 @@ type Config struct { Targets string Target string Output string + UserAgent string + RateLimit int + + // Logging configuration + LogLevel string + LogFormat string + GraylogHost string + GraylogApp string + LogToFile bool + LogFilePath string + client *http.Client fingerprints []Fingerprint + logger zerolog.Logger } func (s *Config) initHTTPClient() { + // Optimize connection pooling for concurrent requests tr := &http.Transport{ - TLSClientConfig: &tls.Config{InsecureSkipVerify: !s.VerifySSL}, + TLSClientConfig: &tls.Config{InsecureSkipVerify: !s.VerifySSL}, + MaxIdleConns: 100, + MaxIdleConnsPerHost: s.Concurrency, + IdleConnTimeout: 90 * time.Second, + DisableKeepAlives: false, } timeout := time.Duration(s.Timeout) * time.Second diff --git a/runner/config_test.go b/runner/config_test.go new file mode 100644 index 0000000..9534876 --- /dev/null +++ b/runner/config_test.go @@ -0,0 +1,173 @@ +package runner + +import ( + "testing" + "time" +) + +func TestInitHTTPClient(t *testing.T) { + tests := []struct { + name string + config Config + verifySetup func(*testing.T, *Config) + }{ + { + name: "default configuration", + config: Config{ + VerifySSL: false, + Concurrency: 10, + Timeout: 10, + }, + verifySetup: func(t *testing.T, c *Config) { + if c.client == nil { + t.Error("HTTP client not initialized") + } + if c.client.Timeout != 10*time.Second { + t.Errorf("Expected timeout 10s, got %v", c.client.Timeout) + } + }, + }, + { + name: "with SSL verification", + config: Config{ + VerifySSL: true, + Concurrency: 5, + Timeout: 30, + }, + verifySetup: func(t *testing.T, c *Config) { + if c.client == nil { + t.Error("HTTP client not initialized") + } + if c.client.Timeout != 30*time.Second { + t.Errorf("Expected timeout 30s, got %v", c.client.Timeout) + } + }, + }, + { + name: "high concurrency", + config: Config{ + VerifySSL: false, + Concurrency: 100, + Timeout: 5, + }, + verifySetup: func(t *testing.T, c *Config) { + if c.client == nil { + t.Error("HTTP client not initialized") + } + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + tt.config.initHTTPClient() + tt.verifySetup(t, &tt.config) + }) + } +} + +func TestLoadFingerprints(t *testing.T) { + // Save original function + originalGetPath := GetFingerprintPath + defer func() { GetFingerprintPath = originalGetPath }() + + // Test with non-existent path + GetFingerprintPath = func() (string, error) { + return "/nonexistent/fingerprints.json", nil + } + + config := &Config{} + err := config.loadFingerprints() + if err == nil { + t.Error("Expected error when fingerprints file doesn't exist") + } +} + +func TestConfigUserAgent(t *testing.T) { + tests := []struct { + name string + userAgent string + }{ + { + name: "default user agent", + userAgent: "", + }, + { + name: "custom user agent", + userAgent: "CustomBot/1.0", + }, + { + name: "browser user agent", + userAgent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64)", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &Config{ + UserAgent: tt.userAgent, + } + if config.UserAgent != tt.userAgent { + t.Errorf("UserAgent = %q, want %q", config.UserAgent, tt.userAgent) + } + }) + } +} + +func TestHTTPClientConnectionPooling(t *testing.T) { + config := &Config{ + Concurrency: 50, + Timeout: 10, + VerifySSL: false, + } + + config.initHTTPClient() + + if config.client == nil { + t.Fatal("HTTP client not initialized") + } + + // Verify transport is configured + transport := config.client.Transport + if transport == nil { + t.Fatal("HTTP transport not configured") + } +} + +func TestTimeoutConfiguration(t *testing.T) { + tests := []struct { + name string + timeoutSeconds int + expectedTimeout time.Duration + }{ + { + name: "1 second timeout", + timeoutSeconds: 1, + expectedTimeout: 1 * time.Second, + }, + { + name: "10 second timeout", + timeoutSeconds: 10, + expectedTimeout: 10 * time.Second, + }, + { + name: "60 second timeout", + timeoutSeconds: 60, + expectedTimeout: 60 * time.Second, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + config := &Config{ + Timeout: tt.timeoutSeconds, + Concurrency: 10, + } + config.initHTTPClient() + + if config.client.Timeout != tt.expectedTimeout { + t.Errorf("Expected timeout %v, got %v", tt.expectedTimeout, config.client.Timeout) + } + }) + } +} diff --git a/runner/download.go b/runner/download.go index c499d7d..8bdcfbd 100644 --- a/runner/download.go +++ b/runner/download.go @@ -16,21 +16,21 @@ import ( var ( fingerprintPath = "https://raw.githubusercontent.com/LukaSikic/subzy/master/runner/fingerprints.json" subzyDir = "subzy" -) - -func GetFingerprintPath() (string, error) { - home, err := homedir.Dir() - if err != nil { - return "", fmt.Errorf("GetFingerprintPath: %v", err) - } - dirPath := filepath.Join(home, subzyDir) - if _, err := os.Stat(dirPath); errors.Is(err, fs.ErrNotExist) { - if err := os.Mkdir(dirPath, os.ModePerm); err != nil { - return "", err + // GetFingerprintPath is a variable to allow overriding in tests + GetFingerprintPath = func() (string, error) { + home, err := homedir.Dir() + if err != nil { + return "", fmt.Errorf("GetFingerprintPath: %v", err) } + dirPath := filepath.Join(home, subzyDir) + if _, err := os.Stat(dirPath); errors.Is(err, fs.ErrNotExist) { + if err := os.Mkdir(dirPath, os.ModePerm); err != nil { + return "", err + } + } + return path.Join(dirPath, "fingerprints.json"), nil } - return path.Join(dirPath, "fingerprints.json"), nil -} +) func downloadFingerprints(fingerprintsPath string) error { out, err := os.OpenFile(fingerprintsPath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, os.ModePerm) @@ -47,7 +47,7 @@ func downloadFingerprints(fingerprintsPath string) error { _, err = io.Copy(out, resp.Body) if err != nil { - fmt.Errorf("downloadFingerprints: %v", err) + return fmt.Errorf("downloadFingerprints: %v", err) } return nil diff --git a/runner/fingerprints.go b/runner/fingerprints.go index dbe335a..49cf939 100644 --- a/runner/fingerprints.go +++ b/runner/fingerprints.go @@ -7,12 +7,12 @@ import ( ) type Fingerprint struct { - Engine string - Status string - Fingerprint string - Discussion string - Documentation string - False_Positive []string + Engine string `json:"engine"` + Status string `json:"status"` + Fingerprint string `json:"fingerprint"` + Discussion string `json:"discussion"` + Documentation string `json:"documentation"` + FalsePositive []string `json:"false_positive"` } func Fingerprints() ([]Fingerprint, error) { diff --git a/runner/fingerprints_test.go b/runner/fingerprints_test.go new file mode 100644 index 0000000..ea3ed8d --- /dev/null +++ b/runner/fingerprints_test.go @@ -0,0 +1,171 @@ +package runner + +import ( + "encoding/json" + "os" + "path/filepath" + "testing" +) + +func TestFingerprintsLoading(t *testing.T) { + // Create a temporary directory for test + tmpDir := t.TempDir() + + // Create a test fingerprints.json file + testFingerprints := []Fingerprint{ + { + Engine: "TestEngine1", + Status: "vulnerable", + Fingerprint: "error-message-1", + Discussion: "https://example.com/discussion1", + Documentation: "https://example.com/docs1", + FalsePositive: []string{"false-positive-1"}, + }, + { + Engine: "TestEngine2", + Status: "vulnerable", + Fingerprint: "error-message-2", + Discussion: "https://example.com/discussion2", + Documentation: "https://example.com/docs2", + FalsePositive: []string{}, + }, + } + + testData, err := json.MarshalIndent(testFingerprints, "", " ") + if err != nil { + t.Fatalf("Failed to marshal test data: %v", err) + } + + testFile := filepath.Join(tmpDir, "fingerprints.json") + if err := os.WriteFile(testFile, testData, 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + // Save original function and restore after test + originalGetPath := GetFingerprintPath + defer func() { GetFingerprintPath = originalGetPath }() + + // Override GetFingerprintPath to return our test path + GetFingerprintPath = func() (string, error) { + return testFile, nil + } + + // Test loading fingerprints + fingerprints, err := Fingerprints() + if err != nil { + t.Fatalf("Fingerprints() error = %v", err) + } + + if len(fingerprints) != 2 { + t.Errorf("Expected 2 fingerprints, got %d", len(fingerprints)) + } + + // Verify first fingerprint + if fingerprints[0].Engine != "TestEngine1" { + t.Errorf("Expected Engine 'TestEngine1', got %q", fingerprints[0].Engine) + } + + if fingerprints[0].Fingerprint != "error-message-1" { + t.Errorf("Expected Fingerprint 'error-message-1', got %q", fingerprints[0].Fingerprint) + } + + if len(fingerprints[0].FalsePositive) != 1 { + t.Errorf("Expected 1 false positive, got %d", len(fingerprints[0].FalsePositive)) + } + + // Verify second fingerprint + if fingerprints[1].Engine != "TestEngine2" { + t.Errorf("Expected Engine 'TestEngine2', got %q", fingerprints[1].Engine) + } + + if len(fingerprints[1].FalsePositive) != 0 { + t.Errorf("Expected 0 false positives, got %d", len(fingerprints[1].FalsePositive)) + } +} + +func TestFingerprintsFileNotFound(t *testing.T) { + // Save original function and restore after test + originalGetPath := GetFingerprintPath + defer func() { GetFingerprintPath = originalGetPath }() + + // Override to return non-existent path + GetFingerprintPath = func() (string, error) { + return "/nonexistent/path/fingerprints.json", nil + } + + _, err := Fingerprints() + if err == nil { + t.Error("Expected error when fingerprints file doesn't exist") + } +} + +func TestFingerprintsInvalidJSON(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "fingerprints.json") + + // Write invalid JSON + if err := os.WriteFile(testFile, []byte("invalid json {{{"), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + // Save original function and restore after test + originalGetPath := GetFingerprintPath + defer func() { GetFingerprintPath = originalGetPath }() + + GetFingerprintPath = func() (string, error) { + return testFile, nil + } + + _, err := Fingerprints() + if err == nil { + t.Error("Expected error when fingerprints file contains invalid JSON") + } +} + +func TestFingerprintStructTags(t *testing.T) { + // Test that JSON tags work correctly + jsonData := `[{ + "engine": "TestEngine", + "status": "vulnerable", + "fingerprint": "test-fp", + "discussion": "test-discussion", + "documentation": "test-docs", + "false_positive": ["fp1", "fp2"] + }]` + + var fingerprints []Fingerprint + err := json.Unmarshal([]byte(jsonData), &fingerprints) + if err != nil { + t.Fatalf("Failed to unmarshal JSON: %v", err) + } + + if len(fingerprints) != 1 { + t.Fatalf("Expected 1 fingerprint, got %d", len(fingerprints)) + } + + fp := fingerprints[0] + + tests := []struct { + name string + got string + expected string + }{ + {"Engine", fp.Engine, "TestEngine"}, + {"Status", fp.Status, "vulnerable"}, + {"Fingerprint", fp.Fingerprint, "test-fp"}, + {"Discussion", fp.Discussion, "test-discussion"}, + {"Documentation", fp.Documentation, "test-docs"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if tt.got != tt.expected { + t.Errorf("%s = %q, want %q", tt.name, tt.got, tt.expected) + } + }) + } + + if len(fp.FalsePositive) != 2 { + t.Errorf("Expected 2 false positives, got %d", len(fp.FalsePositive)) + } +} diff --git a/runner/helpers.go b/runner/helpers.go index 970c6ad..7143176 100644 --- a/runner/helpers.go +++ b/runner/helpers.go @@ -3,7 +3,7 @@ package runner import "net/url" func isEnabled(setting bool) string { - if setting == true { + if setting { return "[ Yes ]" } return "[ No ]" @@ -11,11 +11,7 @@ func isEnabled(setting bool) string { func isValidUrl(toTest string) bool { _, err := url.ParseRequestURI(toTest) - if err != nil { - return false - } else { - return true - } + return err == nil } type subdomainResult struct { diff --git a/runner/helpers_test.go b/runner/helpers_test.go new file mode 100644 index 0000000..e217dfa --- /dev/null +++ b/runner/helpers_test.go @@ -0,0 +1,94 @@ +package runner + +import "testing" + +func TestIsEnabled(t *testing.T) { + tests := []struct { + name string + setting bool + expected string + }{ + { + name: "enabled returns Yes", + setting: true, + expected: "[ Yes ]", + }, + { + name: "disabled returns No", + setting: false, + expected: "[ No ]", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isEnabled(tt.setting) + if result != tt.expected { + t.Errorf("isEnabled(%v) = %q, want %q", tt.setting, result, tt.expected) + } + }) + } +} + +func TestIsValidUrl(t *testing.T) { + tests := []struct { + name string + url string + expected bool + }{ + { + name: "valid http URL", + url: "http://example.com", + expected: true, + }, + { + name: "valid https URL", + url: "https://example.com", + expected: true, + }, + { + name: "valid URL with path", + url: "https://example.com/path/to/resource", + expected: true, + }, + { + name: "valid URL with port", + url: "http://example.com:8080", + expected: true, + }, + { + name: "invalid URL without scheme", + url: "example.com", + expected: false, + }, + { + name: "invalid URL with spaces", + url: "http://exam ple.com", + expected: false, + }, + { + name: "empty string", + url: "", + expected: false, + }, + { + name: "subdomain without scheme", + url: "subdomain.example.com", + expected: false, + }, + { + name: "valid URL with subdomain", + url: "https://subdomain.example.com", + expected: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := isValidUrl(tt.url) + if result != tt.expected { + t.Errorf("isValidUrl(%q) = %v, want %v", tt.url, result, tt.expected) + } + }) + } +} diff --git a/runner/logger.go b/runner/logger.go new file mode 100644 index 0000000..94042f0 --- /dev/null +++ b/runner/logger.go @@ -0,0 +1,141 @@ +package runner + +import ( + "fmt" + "io" + "os" + "time" + + "github.com/rs/zerolog" + "gopkg.in/Graylog2/go-gelf.v2/gelf" +) + +// LogConfig holds logging configuration +type LogConfig struct { + Level string // debug, info, warn, error + Format string // json, console + GraylogHost string // e.g., "graylog.example.com:12201" + GraylogApp string // Application name + EnableFile bool // Log to file + FilePath string // Log file path +} + +// InitLogger initializes and configures the logger with multiple outputs +func InitLogger(cfg LogConfig) (zerolog.Logger, error) { + var writers []io.Writer + + // Console output (development/debugging) + if cfg.Format == "console" || cfg.Format == "" { + consoleWriter := zerolog.ConsoleWriter{ + Out: os.Stdout, + TimeFormat: "15:04:05", + NoColor: false, + } + writers = append(writers, consoleWriter) + } else if cfg.Format == "json" { + // JSON output to stdout + writers = append(writers, os.Stdout) + } + + // Graylog GELF output (production monitoring) + if cfg.GraylogHost != "" { + gelfWriter, err := gelf.NewUDPWriter(cfg.GraylogHost) + if err != nil { + return zerolog.Logger{}, fmt.Errorf("failed to create Graylog writer: %w", err) + } + + // Set Graylog facility (application name) + if cfg.GraylogApp != "" { + gelfWriter.Facility = cfg.GraylogApp + } else { + gelfWriter.Facility = "subzy" + } + + // Wrap GELF writer to convert zerolog format to GELF + writers = append(writers, &gelfLogWriter{writer: gelfWriter}) + } + + // File output + if cfg.EnableFile && cfg.FilePath != "" { + file, err := os.OpenFile(cfg.FilePath, + os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) + if err != nil { + return zerolog.Logger{}, fmt.Errorf("failed to open log file: %w", err) + } + writers = append(writers, file) + } + + // If no writers configured, default to console + if len(writers) == 0 { + writers = append(writers, zerolog.ConsoleWriter{ + Out: os.Stdout, + TimeFormat: "15:04:05", + }) + } + + // Create multi-writer + multi := zerolog.MultiLevelWriter(writers...) + + // Create logger with timestamp and app name + logger := zerolog.New(multi).With(). + Timestamp(). + Str("app", cfg.GraylogApp). + Logger() + + // Set log level + level, err := zerolog.ParseLevel(cfg.Level) + if err != nil { + level = zerolog.InfoLevel + } + logger = logger.Level(level) + + return logger, nil +} + +// gelfLogWriter wraps a GELF writer to make it compatible with zerolog +type gelfLogWriter struct { + writer *gelf.UDPWriter +} + +// Write implements io.Writer for GELF +func (w *gelfLogWriter) Write(p []byte) (n int, err error) { + // Create GELF message + msg := &gelf.Message{ + Version: "1.1", + Host: getHostname(), + Short: string(p), + Full: string(p), + TimeUnix: float64(time.Now().Unix()), + Level: 6, // Info level by default + Extra: map[string]interface{}{ + "_facility": "subzy", + }, + } + + // Send to Graylog + if err := w.writer.WriteMessage(msg); err != nil { + return 0, err + } + + return len(p), nil +} + +// getHostname returns the hostname or "unknown" if unavailable +func getHostname() string { + hostname, err := os.Hostname() + if err != nil { + return "unknown" + } + return hostname +} + +// DefaultLogger creates a default console logger +func DefaultLogger() zerolog.Logger { + return zerolog.New(zerolog.ConsoleWriter{ + Out: os.Stdout, + TimeFormat: "15:04:05", + }).With(). + Timestamp(). + Str("app", "subzy"). + Logger() +} diff --git a/runner/process.go b/runner/process.go index 670be9c..9752ffe 100644 --- a/runner/process.go +++ b/runner/process.go @@ -12,43 +12,75 @@ import ( ) func Process(config *Config) error { - - fingerprints, err := Fingerprints() + // Initialize logger + logger, err := InitLogger(LogConfig{ + Level: config.LogLevel, + Format: config.LogFormat, + GraylogHost: config.GraylogHost, + GraylogApp: config.GraylogApp, + EnableFile: config.LogToFile, + FilePath: config.LogFilePath, + }) if err != nil { - return fmt.Errorf("Process: %v", err) + return fmt.Errorf("failed to initialize logger: %w", err) } + config.logger = logger config.initHTTPClient() - config.loadFingerprints() + if err := config.loadFingerprints(); err != nil { + return fmt.Errorf("Process: %v", err) + } subdomains := getSubdomains(config) - fmt.Println("[ * ]", "Loaded", len(subdomains), "targets") - fmt.Println("[ * ]", "Loaded", len(fingerprints), "fingerprints") - if config.Output != "" { - fmt.Printf("[ * ] Output filename: %s\n", config.Output) - fmt.Println(isEnabled(config.OnlyVuln), "Save only vulnerable subdomains") - } + // Log scan configuration + logger.Info(). + Int("target_count", len(subdomains)). + Int("fingerprint_count", len(config.fingerprints)). + Str("output_file", config.Output). + Bool("only_vulnerable", config.OnlyVuln). + Bool("https_default", config.HTTPS). + Int("concurrency", config.Concurrency). + Bool("verify_ssl", config.VerifySSL). + Int("timeout_seconds", config.Timeout). + Bool("hide_fails", config.HideFails). + Msg("Starting subdomain takeover scan") + + // Keep console output for user feedback (when not in JSON mode) + if config.LogFormat != "json" { + fmt.Println("[ * ]", "Loaded", len(subdomains), "targets") + fmt.Println("[ * ]", "Loaded", len(config.fingerprints), "fingerprints") + if config.Output != "" { + fmt.Printf("[ * ] Output filename: %s\n", config.Output) + fmt.Println(isEnabled(config.OnlyVuln), "Save only vulnerable subdomains") + } - fmt.Println(isEnabled(config.HTTPS), "HTTPS by default (--https)") - fmt.Println("[", config.Concurrency, "]", "Concurrent requests (--concurrency)") - fmt.Println(isEnabled(config.VerifySSL), "Check target only if SSL is valid (--verify_ssl)") - fmt.Println("[", config.Timeout, "]", "HTTP request timeout (in seconds) (--timeout)") - fmt.Println(isEnabled(config.HideFails), "Show only potentially vulnerable subdomains (--hide_fails)") + fmt.Println(isEnabled(config.HTTPS), "HTTPS by default (--https)") + fmt.Println("[", config.Concurrency, "]", "Concurrent requests (--concurrency)") + fmt.Println(isEnabled(config.VerifySSL), "Check target only if SSL is valid (--verify_ssl)") + fmt.Println("[", config.Timeout, "]", "HTTP request timeout (in seconds) (--timeout)") + fmt.Println(isEnabled(config.HideFails), "Show only potentially vulnerable subdomains (--hide_fails)") + } - subdomainCh := make(chan string, config.Concurrency+5) + subdomainCh := make(chan string, config.Concurrency*2) resCh := make(chan *subdomainResult, config.Concurrency) var wg sync.WaitGroup wg.Add(config.Concurrency) var results []*subdomainResult + var resultsMu sync.Mutex + var resultsWg sync.WaitGroup + resultsWg.Add(1) go func() { + defer resultsWg.Done() for r := range resCh { if config.Output != "" { if config.OnlyVuln && r.Status != ResultVulnerable { continue } + resultsMu.Lock() results = append(results, r) + resultsMu.Unlock() } } }() @@ -66,6 +98,7 @@ func Process(config *Config) error { wg.Wait() close(resCh) + resultsWg.Wait() if config.Output != "" { f, err := os.OpenFile(config.Output, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm) @@ -81,9 +114,17 @@ func Process(config *Config) error { return err } - fmt.Printf("[ * ] Saved output to %q\n", config.Output) + logger.Info(). + Str("output_file", config.Output). + Int("result_count", len(results)). + Msg("Saved scan results to file") + + if config.LogFormat != "json" { + fmt.Printf("[ * ] Saved output to %q\n", config.Output) + } } + logger.Info().Msg("Scan completed") return nil } @@ -96,28 +137,49 @@ func processor(subdomainCh chan string, resCh chan *subdomainResult, c *Config, Status: string(result.resStatus), Engine: result.entry.Engine, Documentation: result.entry.Documentation, + Discussion: result.entry.Discussion, } if result.status == aurora.Green("VULNERABLE") { - fmt.Print("-----------------\r\n") - fmt.Println("[ ", result.status, " ]", " - ", subdomain, " [ ", result.entry.Engine, " ] ") - fmt.Println("[ ", aurora.Blue("DISCUSSION"), " ]", " - ", result.entry.Discussion) - fmt.Println("[ ", aurora.Blue("DOCUMENTATION"), " ]", " - ", result.entry.Documentation) - - fmt.Print("-----------------\r\n") - + // Log vulnerability with structured data + c.logger.Error(). + Str("subdomain", subdomain). + Str("status", "vulnerable"). + Str("engine", result.entry.Engine). + Str("documentation", result.entry.Documentation). + Str("discussion", result.entry.Discussion). + Msg("Vulnerable subdomain detected") + + // Console output for user + if c.LogFormat != "json" { + fmt.Print("-----------------\r\n") + fmt.Println("[ ", result.status, " ]", " - ", subdomain, " [ ", result.entry.Engine, " ] ") + fmt.Println("[ ", aurora.Blue("DISCUSSION"), " ]", " - ", result.entry.Discussion) + fmt.Println("[ ", aurora.Blue("DOCUMENTATION"), " ]", " - ", result.entry.Documentation) + fmt.Print("-----------------\r\n") + } } else { - if !c.HideFails { + // Log check result + if result.resStatus == ResultHTTPError { + c.logger.Warn(). + Str("subdomain", subdomain). + Str("status", string(result.resStatus)). + Msg("HTTP error checking subdomain") + } else { + c.logger.Debug(). + Str("subdomain", subdomain). + Str("status", string(result.resStatus)). + Msg("Subdomain check completed") + } + + // Console output + if !c.HideFails && c.LogFormat != "json" { fmt.Println("[ ", result.status, " ]", " - ", subdomain) } } } } -func generator(subdomain string, subdomainCh chan string) { - subdomainCh <- subdomain -} - func getSubdomains(c *Config) []string { if c.Target == "" { subdomains, err := readSubdomains(c.Targets) diff --git a/runner/reader_test.go b/runner/reader_test.go new file mode 100644 index 0000000..804fd0e --- /dev/null +++ b/runner/reader_test.go @@ -0,0 +1,138 @@ +package runner + +import ( + "fmt" + "os" + "path/filepath" + "testing" +) + +func TestReadSubdomains(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "subdomains.txt") + + // Create test file with subdomains + content := "subdomain1.example.com\nsubdomain2.example.com\nsubdomain3.example.com\n" + if err := os.WriteFile(testFile, []byte(content), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + subdomains, err := readSubdomains(testFile) + if err != nil { + t.Fatalf("readSubdomains() error = %v", err) + } + + expectedCount := 3 + if len(subdomains) != expectedCount { + t.Errorf("Expected %d subdomains, got %d", expectedCount, len(subdomains)) + } + + expected := []string{ + "subdomain1.example.com", + "subdomain2.example.com", + "subdomain3.example.com", + } + + for i, subdomain := range subdomains { + if subdomain != expected[i] { + t.Errorf("Subdomain[%d] = %q, want %q", i, subdomain, expected[i]) + } + } +} + +func TestReadSubdomainsEmptyFile(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "empty.txt") + + // Create empty file + if err := os.WriteFile(testFile, []byte(""), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + subdomains, err := readSubdomains(testFile) + if err != nil { + t.Fatalf("readSubdomains() error = %v", err) + } + + if len(subdomains) != 0 { + t.Errorf("Expected 0 subdomains from empty file, got %d", len(subdomains)) + } +} + +func TestReadSubdomainsWithEmptyLines(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "subdomains.txt") + + // Create test file with empty lines + content := "subdomain1.example.com\n\nsubdomain2.example.com\n\n\nsubdomain3.example.com\n" + if err := os.WriteFile(testFile, []byte(content), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + subdomains, err := readSubdomains(testFile) + if err != nil { + t.Fatalf("readSubdomains() error = %v", err) + } + + // Empty lines are included as empty strings + expectedCount := 6 + if len(subdomains) != expectedCount { + t.Errorf("Expected %d entries (including empty lines), got %d", expectedCount, len(subdomains)) + } +} + +func TestReadSubdomainsFileNotFound(t *testing.T) { + _, err := readSubdomains("/nonexistent/path/file.txt") + if err == nil { + t.Error("Expected error when file doesn't exist") + } +} + +func TestReadSubdomainsWithWhitespace(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "subdomains.txt") + + // Create test file with whitespace + content := " subdomain1.example.com \nsubdomain2.example.com\n\tsubdomain3.example.com\t\n" + if err := os.WriteFile(testFile, []byte(content), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + subdomains, err := readSubdomains(testFile) + if err != nil { + t.Fatalf("readSubdomains() error = %v", err) + } + + // Scanner preserves whitespace + if subdomains[0] != " subdomain1.example.com " { + t.Errorf("Expected whitespace to be preserved, got %q", subdomains[0]) + } +} + +func TestReadSubdomainsLargeFile(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "large.txt") + + // Create a file with many subdomains + var content string + count := 1000 + for i := 0; i < count; i++ { + if i > 0 { + content += "\n" + } + content += fmt.Sprintf("subdomain%d.example.com", i) + } + + if err := os.WriteFile(testFile, []byte(content), 0644); err != nil { + t.Fatalf("Failed to write test file: %v", err) + } + + subdomains, err := readSubdomains(testFile) + if err != nil { + t.Fatalf("readSubdomains() error = %v", err) + } + + if len(subdomains) != count { + t.Errorf("Expected %d subdomains, got %d", count, len(subdomains)) + } +} diff --git a/runner/worker.go b/runner/worker.go index 3516cf4..3d67c31 100644 --- a/runner/worker.go +++ b/runner/worker.go @@ -1,6 +1,8 @@ package runner import ( + "net/http" + "github.com/logrusorgru/aurora" "io" "strings" @@ -22,7 +24,7 @@ type Result struct { } func (c *Config) checkSubdomain(subdomain string) Result { - if isValidUrl(subdomain) == false { + if !isValidUrl(subdomain) { if c.HTTPS { subdomain = "https://" + subdomain } else { @@ -30,16 +32,29 @@ func (c *Config) checkSubdomain(subdomain string) Result { } } - resp, err := c.client.Get(subdomain) + req, err := http.NewRequest("GET", subdomain, nil) if err != nil { return Result{ResultHTTPError, aurora.Red("HTTP ERROR"), Fingerprint{}} } - body, err := io.ReadAll(resp.Body) + + // Set User-Agent if configured + if c.UserAgent != "" { + req.Header.Set("User-Agent", c.UserAgent) + } else { + req.Header.Set("User-Agent", "Subzy/1.1.0 (Subdomain Takeover Scanner)") + } + + resp, err := c.client.Do(req) if err != nil { - resp.Body.Close() - return Result{ResultResponseError, aurora.Red("RESPONSE ERROR"), Fingerprint{}} + return Result{ResultHTTPError, aurora.Red("HTTP ERROR"), Fingerprint{}} } + // Limit response body to 1MB to prevent memory exhaustion + limitedBody := io.LimitReader(resp.Body, 1024*1024) + body, err := io.ReadAll(limitedBody) resp.Body.Close() + if err != nil { + return Result{ResultResponseError, aurora.Red("RESPONSE ERROR"), Fingerprint{}} + } return c.matchResponse(string(body)) } @@ -47,10 +62,9 @@ func (c *Config) checkSubdomain(subdomain string) Result { func (c *Config) matchResponse(body string) Result { for _, fingerprint := range c.fingerprints { if strings.Contains(body, fingerprint.Fingerprint) { - for _, false_positive_string := range fingerprint.False_Positive { - if len(string(false_positive_string)) > 0 { - - if strings.Contains(body, string(false_positive_string)) { + for _, falsePositiveString := range fingerprint.FalsePositive { + if len(falsePositiveString) > 0 { + if strings.Contains(body, falsePositiveString) { return Result{ResultNotVulnerable, aurora.Red("NOT VULNERABLE"), Fingerprint{}} } } diff --git a/runner/worker_test.go b/runner/worker_test.go new file mode 100644 index 0000000..6519b5d --- /dev/null +++ b/runner/worker_test.go @@ -0,0 +1,190 @@ +package runner + +import ( + "net/http" + "net/http/httptest" + "testing" +) + +func TestMatchResponse(t *testing.T) { + config := &Config{ + fingerprints: []Fingerprint{ + { + Engine: "TestService", + Fingerprint: "unique-error-message", + FalsePositive: []string{}, + Discussion: "https://example.com/discussion", + Documentation: "https://example.com/docs", + }, + { + Engine: "FalsePositiveService", + Fingerprint: "error-occurred", + FalsePositive: []string{"but-its-ok"}, + Discussion: "https://example.com/discussion2", + Documentation: "https://example.com/docs2", + }, + }, + } + + tests := []struct { + name string + body string + expectedStatus resultStatus + }{ + { + name: "vulnerable when fingerprint matches", + body: "This page contains unique-error-message", + expectedStatus: ResultVulnerable, + }, + { + name: "not vulnerable when no fingerprint matches", + body: "This page has no matching fingerprint", + expectedStatus: ResultNotVulnerable, + }, + { + name: "not vulnerable when false positive detected", + body: "error-occurred but-its-ok", + expectedStatus: ResultNotVulnerable, + }, + { + name: "vulnerable when fingerprint matches without false positive", + body: "error-occurred without the ok string", + expectedStatus: ResultVulnerable, + }, + { + name: "empty body returns not vulnerable", + body: "", + expectedStatus: ResultNotVulnerable, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := config.matchResponse(tt.body) + if result.resStatus != tt.expectedStatus { + t.Errorf("matchResponse() status = %v, want %v", result.resStatus, tt.expectedStatus) + } + }) + } +} + +func TestCheckSubdomain(t *testing.T) { + // Create a test HTTP server + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Check User-Agent header + ua := r.Header.Get("User-Agent") + if ua == "" { + t.Error("User-Agent header not set") + } + w.WriteHeader(http.StatusOK) + w.Write([]byte("test-fingerprint")) + })) + defer ts.Close() + + config := &Config{ + HTTPS: false, + VerifySSL: false, + Timeout: 10, + fingerprints: []Fingerprint{ + { + Engine: "TestEngine", + Fingerprint: "test-fingerprint", + }, + }, + } + config.initHTTPClient() + + tests := []struct { + name string + subdomain string + expectedStatus resultStatus + }{ + { + name: "valid URL with server response", + subdomain: ts.URL, + expectedStatus: ResultVulnerable, + }, + { + name: "URL without scheme adds http", + subdomain: ts.URL[7:], // Remove http:// + expectedStatus: ResultVulnerable, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := config.checkSubdomain(tt.subdomain) + if result.resStatus != tt.expectedStatus { + t.Errorf("checkSubdomain() status = %v, want %v", result.resStatus, tt.expectedStatus) + } + }) + } +} + +func TestCheckSubdomainHTTPError(t *testing.T) { + config := &Config{ + HTTPS: false, + VerifySSL: false, + Timeout: 1, + } + config.initHTTPClient() + + result := config.checkSubdomain("http://nonexistent-domain-12345.invalid") + if result.resStatus != ResultHTTPError { + t.Errorf("checkSubdomain() with invalid domain should return HTTP error, got %v", result.resStatus) + } +} + +func TestCheckSubdomainWithHTTPSFlag(t *testing.T) { + config := &Config{ + HTTPS: true, + VerifySSL: false, + Timeout: 10, + fingerprints: []Fingerprint{ + { + Engine: "TestEngine", + Fingerprint: "not-present", + }, + }, + } + config.initHTTPClient() + + // Test that HTTPS flag is respected + // This will fail with connection error, but we're just checking the URL construction + subdomain := "example.com" + result := config.checkSubdomain(subdomain) + + // Should get an HTTP error trying to connect + if result.resStatus != ResultHTTPError { + t.Errorf("Expected HTTP error for unreachable HTTPS domain, got %v", result.resStatus) + } +} + +func TestResponseBodyLimit(t *testing.T) { + // Create a test server that returns a very large response + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Write 2MB of data (more than the 1MB limit) + data := make([]byte, 2*1024*1024) + for i := range data { + data[i] = 'A' + } + w.Write(data) + })) + defer ts.Close() + + config := &Config{ + HTTPS: false, + VerifySSL: false, + Timeout: 10, + fingerprints: []Fingerprint{}, + } + config.initHTTPClient() + + // Should not cause memory issues due to 1MB limit + result := config.checkSubdomain(ts.URL) + + // Should complete without error + if result.resStatus == ResultResponseError { + t.Error("Should handle large responses without error") + } +}